Mercurial > hg > openjdk6-mips

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env_debug.sh	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,27 @@
+export LANG=C
+export LC_ALL=C
+export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/lib/jvm/java-6-openjdk
+export ALT_BOOTDIR=/usr/lib/jvm/java-6-openjdk
+#export ALT_BOOTDIR=/home/loongson/java/j2sdk-gs2
+export ALT_BINARY_PLUGS_PATH=/opt/java/openjdk-binary-plugs
+export ALT_JDK_IMPORT_PATH=/usr/lib/jvm/java-6-openjdk
+#export ALT_SLASH_JAVA=/yangyongqiang
+export ARCH_DATA_MODEL=32
+
+export DEBUG_NAME=debug
+#The default hotspot-build is all_product. Setting DEBUG_NAME would change the target.
+
+export BUILD_LANGTOOLS=fasle
+export BUILD_CORBA=false
+export BUILD_JAXP=false
+export BUILD_JAXWS=false
+export BUILD_MOTIF=false
+export BUILD_JDK=false
+export BUILD_DEPLOY=false
+
+export CLIENT_ONLY=true
+
+
+unset CLASSPATH
+unset JAVA_HOME
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env_product.sh	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,18 @@
+export LC_ALL=C
+export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/lib/jvm/java-6-openjdk
+export ALT_BOOTDIR=/usr/lib/jvm/java-6-openjdk
+export ALT_BINARY_PLUGS_PATH=/opt/java/openjdk-binary-plugs
+export ARCH_DATA_MODEL=32
+
+#The default hotspot-build is all_product. Setting DEBUG_NAME would change the target.
+
+export BUILD_LANGTOOLS=false
+export BUILD_CORBA=false
+export BUILD_JAXP=false
+export BUILD_JAXWS=false
+export BUILD_MOTIF=false
+export BUILD_JDK=false
+export BUILD_DEPLOY=false
+
+unset CLASSPATH
+unset JAVA_HOME
--- a/hotspot/make/Makefile	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/make/Makefile	Thu Sep 30 13:48:16 2010 +0800
@@ -91,7 +91,8 @@
 all:           all_product all_fastdebug
 all_product:   product product1 productkernel docs export_product
 all_fastdebug: fastdebug fastdebug1 fastdebugkernel docs export_fastdebug
-all_debug:     jvmg jvmg1 jvmgkernel docs export_debug
+all_debug:     jvmg1 jvmgkernel docs export_debug
+#all_debug:     jvmg jvmg1 jvmgkernel docs export_debug
 all_optimized: optimized optimized1 optimizedkernel docs export_optimized

 # Do everything
--- a/hotspot/make/defs.make	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/make/defs.make	Thu Sep 30 13:48:16 2010 +0800
@@ -192,13 +192,16 @@

   # Use uname output for SRCARCH, but deal with platform differences. If ARCH
   # is not explicitly listed below, it is treated as x86.
-  SRCARCH     = $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64,$(ARCH)))
+  #SRCARCH     = $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64,$(ARCH)))
+  SRCARCH     = $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 mips mips64,$(ARCH)))
   ARCH/       = x86
   ARCH/sparc  = sparc
   ARCH/sparc64= sparc
   ARCH/ia64   = ia64
   ARCH/amd64  = x86
   ARCH/x86_64 = x86
+  ARCH/mips   = mips
+  ARCH/mips64 = mips

   # BUILDARCH is usually the same as SRCARCH, except for sparcv9
   BUILDARCH = $(SRCARCH)
@@ -214,6 +217,11 @@
       BUILDARCH = sparcv9
     endif
   endif
+  ifeq ($(BUILDARCH), mips)
+    ifdef LP64
+      BUILDARCH = mips64
+    endif
+  endif

   # LIBARCH is 1:1 mapping from BUILDARCH
   LIBARCH         = $(LIBARCH/$(BUILDARCH))
@@ -222,8 +230,10 @@
   LIBARCH/sparc   = sparc
   LIBARCH/sparcv9 = sparcv9
   LIBARCH/ia64    = ia64
+  LIBARCH/mips    = mips
+  LIBARCH/mips64  = mips64

-  LP64_ARCH = sparcv9 amd64 ia64
+  LP64_ARCH = sparcv9 amd64 ia64 mips64
 endif

 # Required make macro settings for all platforms
--- a/hotspot/make/linux/makefiles/buildtree.make	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/make/linux/makefiles/buildtree.make	Thu Sep 30 13:48:16 2010 +0800
@@ -114,7 +114,7 @@
 BUILDTREE_MAKE	= $(GAMMADIR)/make/$(OS_FAMILY)/makefiles/buildtree.make

 BUILDTREE_TARGETS = Makefile flags.make flags_vm.make vm.make adlc.make jvmti.make sa.make \
-        env.sh env.csh .dbxrc test_gamma
+        env.sh env.csh .dbxrc #test_gamma

 BUILDTREE_VARS	= GAMMADIR=$(GAMMADIR) OS_FAMILY=$(OS_FAMILY) \
 	ARCH=$(ARCH) BUILDARCH=$(BUILDARCH) LIBARCH=$(LIBARCH) VARIANT=$(VARIANT)
--- a/hotspot/make/linux/makefiles/debug.make	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/make/linux/makefiles/debug.make	Thu Sep 30 13:48:16 2010 +0800
@@ -40,5 +40,5 @@

 G_SUFFIX =
 VERSION = debug
-SYSDEFS += -DASSERT -DDEBUG
+SYSDEFS += -DASSERT -DDEBUG -DLOONGSONDEBUG
 PICFLAGS = DEFAULT
--- a/hotspot/make/linux/makefiles/gcc.make	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/make/linux/makefiles/gcc.make	Thu Sep 30 13:48:16 2010 +0800
@@ -25,8 +25,8 @@
 #------------------------------------------------------------------------
 # CC, CPP & AS

-CPP = g++
-CC  = gcc
+CPP = g++-4.2
+CC  = gcc-4.2
 AS  = $(CC) -c

 # -dumpversion in gcc-2.91 shows "egcs-2.91.66". In later version, it only
@@ -83,7 +83,7 @@
 endif

 # Compiler warnings are treated as errors
-WARNINGS_ARE_ERRORS = -Werror
+WARNINGS_ARE_ERRORS =

 # Except for a few acceptable ones
 # Since GCC 4.3, -Wconversion has changed its meanings to warn these implicit
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/make/linux/makefiles/mips.make	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,44 @@
+# @(#)linux/makefiles/amd64.make
+#
+# Copyright 2003-2005 Sun Microsystems, Inc.  All Rights Reserved.
+# Copyright 2007-2008 Reservoir Labs, Inc.  All Rights Reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+# CA 95054 USA or visit www.sun.com if you need additional information or
+# have any questions.
+#
+#
+
+# Not included in includeDB because it has no dependencies
+Obj_Files += linux_mips.o
+
+# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
+# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
+# Must also specify if CPU is little endian
+CFLAGS += -DVM_LITTLE_ENDIAN
+
+CFLAGS += -DSICORTEX_ERRATA
+
+#CFLAGS += -D_LP64=1
+
+# The serviceability agent relies on frame pointer (%rbp) to walk thread stack
+CFLAGS += -fno-omit-frame-pointer
+
+OPT_CFLAGS/compactingPermGenGen.o = -O1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/make/linux/makefiles/mips64.make	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,44 @@
+# @(#)linux/makefiles/amd64.make
+#
+# Copyright 2003-2005 Sun Microsystems, Inc.  All Rights Reserved.
+# Copyright 2007-2008 Reservoir Labs, Inc.  All Rights Reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+# CA 95054 USA or visit www.sun.com if you need additional information or
+# have any questions.
+#
+#
+
+# Not included in includeDB because it has no dependencies
+Obj_Files += linux_mips.o
+
+# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
+# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
+# Must also specify if CPU is little endian
+CFLAGS += -DVM_LITTLE_ENDIAN
+
+CFLAGS += -DSICORTEX_ERRATA
+
+CFLAGS += -D_LP64=1
+
+# The serviceability agent relies on frame pointer (%rbp) to walk thread stack
+CFLAGS += -fno-omit-frame-pointer
+
+OPT_CFLAGS/compactingPermGenGen.o = -O1
--- a/hotspot/make/linux/makefiles/vm.make	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/make/linux/makefiles/vm.make	Thu Sep 30 13:48:16 2010 +0800
@@ -221,7 +221,9 @@

 #----------------------------------------------------------------------

-build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) checkAndBuildSA
+#TODO aoqi
+#build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) checkAndBuildSA
+build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB)

 install: install_jvm install_jsig install_saproc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/make/linux/platform_mips	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,15 @@
+os_family = linux
+
+arch = mips
+
+arch_model = mips
+
+os_arch = linux_mips
+
+os_arch_model = linux_mips
+
+compiler = gcc
+
+gnu_dis_arch = mips
+
+sysdefs = -DLINUX -D_GNU_SOURCE -DMIPS32
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/assembler_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,3017 @@
+/*
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_assembler_mips.cpp.incl"
+
+int MacroAssembler::i[32] = {0,};
+float MacroAssembler::f[32] = {0.0,};
+
+void MacroAssembler::print(outputStream *s) {
+	unsigned int k;
+	for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
+		s->print_cr("i%d = 0x%.16lx", k, i[k]);
+	}
+	s->cr();
+
+	for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
+		s->print_cr("f%d = %f", k, f[k]);
+	}
+	s->cr();
+}
+
+
+int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
+int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
+
+void MacroAssembler::save_registers(MacroAssembler *masm) {
+#define __ masm->
+	for(int k=0; k<32; k++) {
+		__ sw (as_Register(k), A0, i_offset(k));
+	}
+
+	for(int k=0; k<32; k++) {
+		__ swc1 (as_FloatRegister(k), A0, f_offset(k));
+	}
+#undef __
+}
+
+void MacroAssembler::restore_registers(MacroAssembler *masm) {
+#define __ masm->
+	for(int k=0; k<32; k++) {
+		__ lw (as_Register(k), A0, i_offset(k));
+	}
+
+	for(int k=0; k<32; k++) {
+		__ lwc1 (as_FloatRegister(k), A0, f_offset(k));
+	}
+#undef __
+}
+
+
+// Implementation of AddressLiteral
+
+AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
+  _is_lval = false;
+  _target = target;
+  switch (rtype) {
+  case relocInfo::oop_type:
+    // Oops are a special case. Normally they would be their own section
+    // but in cases like icBuffer they are literals in the code stream that
+    // we don't have a section for. We use none so that we get a literal address
+    // which is always patchable.
+    break;
+  case relocInfo::external_word_type:
+    _rspec = external_word_Relocation::spec(target);
+    break;
+  case relocInfo::internal_word_type:
+    _rspec = internal_word_Relocation::spec(target);
+    break;
+  case relocInfo::opt_virtual_call_type:
+    _rspec = opt_virtual_call_Relocation::spec();
+    break;
+  case relocInfo::static_call_type:
+    _rspec = static_call_Relocation::spec();
+    break;
+  case relocInfo::runtime_call_type:
+    _rspec = runtime_call_Relocation::spec();
+    break;
+  case relocInfo::poll_type:
+  case relocInfo::poll_return_type:
+    _rspec = Relocation::spec_simple(rtype);
+    break;
+  case relocInfo::none:
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+// Implementation of Address
+
+#ifdef _LP64
+
+Address Address::make_array(ArrayAddress adr) {
+  // Not implementable on 64bit machines
+  // Should have been handled higher up the call chain.
+  ShouldNotReachHere();
+  return Address();
+}
+
+// exceedingly dangerous constructor
+Address::Address(int disp, address loc, relocInfo::relocType rtype) {
+  _base  = noreg;
+  _index = noreg;
+  _scale = no_scale;
+  _disp  = disp;
+  switch (rtype) {
+    case relocInfo::external_word_type:
+      _rspec = external_word_Relocation::spec(loc);
+      break;
+    case relocInfo::internal_word_type:
+      _rspec = internal_word_Relocation::spec(loc);
+      break;
+    case relocInfo::runtime_call_type:
+      // HMM
+      _rspec = runtime_call_Relocation::spec();
+      break;
+    case relocInfo::poll_type:
+    case relocInfo::poll_return_type:
+      _rspec = Relocation::spec_simple(rtype);
+      break;
+    case relocInfo::none:
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+#else // LP64
+
+Address Address::make_array(ArrayAddress adr) {
+  AddressLiteral base = adr.base();
+  Address index = adr.index();
+  assert(index._disp == 0, "must not have disp"); // maybe it can?
+  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
+  array._rspec = base._rspec;
+  return array;
+}
+
+// exceedingly dangerous constructor
+Address::Address(address loc, RelocationHolder spec) {
+  _base  = noreg;
+  _index = noreg;
+  _scale = no_scale;
+  _disp  = (intptr_t) loc;
+  _rspec = spec;
+}
+
+#endif // _LP64
+
+
+/*
+// Convert the raw encoding form into the form expected by the constructor for
+// Address.  An index of 4 (rsp) corresponds to having no index, so convert
+// that to noreg for the Address constructor.
+Address Address::make_raw(int base, int index, int scale, int disp) {
+  bool valid_index = index != rsp->encoding();
+  if (valid_index) {
+    Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
+    return madr;
+  } else {
+    Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
+    return madr;
+  }
+}
+*/
+
+// Implementation of Assembler
+const char *Assembler::ops_name[] = {
+	"special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
+	"addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
+	"cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
+	"daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
+	"lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
+	"sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
+	"ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
+	"sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
+};
+
+const char* Assembler::special_name[] = {
+	"sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
+	"jr",       "jalr",     "",         "",         "syscall",  "break",    "",         "sync",
+	"mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
+	"mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
+	"add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
+	"",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
+	"tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
+	"dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
+};
+
+const char* Assembler::regimm_name[] = {
+	"bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
+	"tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
+	"bltzal",   "bgezal",   "bltzall",  "bgezall"
+};
+
+const char* Assembler::float_name[] = {
+	"add",			"sub",			"mul",			"div",			"sqrt",			"abs",			"mov",			"neg",
+	"round.l",	"trunc.l",	"ceil.l",		"floor.l",	"round.w",  "trunc.w",	"ceil.w",		"floor.w"
+};
+
+//misleading name, print only branch/jump instruction
+void Assembler::print_instruction(int inst) {
+	const char *s;
+	switch( opcode(inst) ) {
+	default:
+		s = ops_name[opcode(inst)];
+		break;
+	case special_op:
+		s = special_name[special(inst)];
+		break;
+	case regimm_op:
+		s = special_name[rt(inst)];
+		break;
+	}
+
+	::tty->print("%s", s);
+}
+
+//without check, maybe fixed
+int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
+	int v = (dest_pos - inst_pos - 4)>>2;
+	switch(opcode(inst)) {
+	case j_op:
+	case jal_op:
+		assert(false, "should not use j/jal here");
+		break;
+	default:
+		v = low16(v);
+		inst &= 0xffff0000;
+		break;
+	}
+
+	return inst | v;
+}
+
+int Assembler::branch_destination(int inst, int pos) {
+	int off;
+
+	switch(opcode(inst)) {
+	case j_op:
+	case jal_op:
+		assert(false, "should not use j/jal here");
+		break;
+	default:
+		off = expand(low16(inst), 15);
+		break;
+	}
+
+	return off ? pos + 4 + (off<<2) : 0;
+}
+
+int AbstractAssembler::code_fill_byte() {
+	  return 0x00;                  // illegal instruction 0x00000000
+}
+
+// Now the Assembler instruction (identical for 32/64 bits)
+
+void Assembler::lb(Register rt, Address src) {
+	lb(rt, src.base(), src.disp());
+}
+
+void Assembler::lbu(Register rt, Address src) {
+	lbu(rt, src.base(), src.disp());
+}
+
+void Assembler::ld(Register rt, Address src){
+	ld(rt, src.base(), src.disp());
+}
+
+void Assembler::ldl(Register rt, Address src){
+	ldl(rt, src.base(), src.disp());
+}
+
+void Assembler::ldr(Register rt, Address src){
+	ldr(rt, src.base(), src.disp());
+}
+
+void Assembler::lh(Register rt, Address src){
+	lh(rt, src.base(), src.disp());
+}
+
+void Assembler::lhu(Register rt, Address src){
+	lhu(rt, src.base(), src.disp());
+}
+
+void Assembler::ll(Register rt, Address src){
+	ll(rt, src.base(), src.disp());
+}
+
+void Assembler::lld(Register rt, Address src){
+	lld(rt, src.base(), src.disp());
+}
+
+void Assembler::lw(Register rt, Address src){
+	lw(rt, src.base(), src.disp());
+}
+void Assembler::lea(Register rt, Address src) {
+	addi(rt, src.base(), src.disp());
+}
+
+void Assembler::lwl(Register rt, Address src){
+	lwl(rt, src.base(), src.disp());
+}
+
+void Assembler::lwr(Register rt, Address src){
+	lwr(rt, src.base(), src.disp());
+}
+
+void Assembler::lwu(Register rt, Address src){
+	lwu(rt, src.base(), src.disp());
+}
+
+void Assembler::sb(Register rt, Address dst) {
+	sb(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sc(Register rt, Address dst) {
+	sc(rt, dst.base(), dst.disp());
+}
+
+void Assembler::scd(Register rt, Address dst) {
+	scd(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sd(Register rt, Address dst) {
+	sd(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sdl(Register rt, Address dst) {
+	sdl(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sdr(Register rt, Address dst) {
+	sdr(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sh(Register rt, Address dst) {
+	sh(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sw(Register rt, Address dst) {
+	sw(rt, dst.base(), dst.disp());
+}
+
+void Assembler::swl(Register rt, Address dst) {
+	swl(rt, dst.base(), dst.disp());
+}
+
+void Assembler::swr(Register rt, Address dst) {
+	swr(rt, dst.base(), dst.disp());
+}
+
+void Assembler::lwc1(FloatRegister rt, Address src) {
+	lwc1(rt, src.base(), src.disp());
+}
+
+void Assembler::ldc1(FloatRegister rt, Address src) {
+	ldc1(rt, src.base(), src.disp());
+}
+
+void Assembler::swc1(FloatRegister rt, Address dst) {
+	swc1(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sdc1(FloatRegister rt, Address dst) {
+	sdc1(rt, dst.base(), dst.disp());
+}
+
+void Assembler::j(address entry) {
+	int dest = ((int)entry - (((int)pc() + 4) & 0xf0000000))>>2;
+	emit_long((j_op<<26) | dest);
+	has_delay_slot();
+}
+
+void Assembler::jal(address entry) {
+	int dest = ((int)entry - (((int)pc() + 4) & 0xf0000000))>>2;
+	emit_long((jal_op<<26) | dest);
+	has_delay_slot();
+}
+
+
+
+
+
+
+
+
+
+// Implementation of MacroAssembler
+
+// First all the versions that have distinct versions depending on 32/64 bit
+// Unless the difference is trivial (1 line or so).
+
+//#ifndef _LP64
+
+// 32bit versions
+
+void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) {
+  addu_long(AT, base, offset);
+  ld_ptr(rt, 0, AT);
+}
+
+void MacroAssembler::st_ptr(Register rt, Register offset, Register base) {
+  addu_long(AT, base, offset);
+  st_ptr(rt, 0, AT);
+}
+
+void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
+  addu_long(AT, base, offset);
+  ld_long(rt, 0, AT);
+}
+
+void MacroAssembler::st_long(Register rt, Register offset, Register base) {
+  addu_long(AT, base, offset);
+  st_long(rt, 0, AT);
+}
+
+Address MacroAssembler::as_Address(AddressLiteral adr) {
+  return Address(adr.target(), adr.rspec());
+}
+
+Address MacroAssembler::as_Address(ArrayAddress adr) {
+  return Address::make_array(adr);
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+	assert(UseBiasedLocking, "why call this otherwise?");
+	//assert(swap_reg == eax, "swap_reg must be eax for cmpxchg");
+	assert_different_registers(lock_reg, obj_reg, swap_reg);
+	bool need_tmp_reg = false;
+	if (tmp_reg == noreg) {
+		need_tmp_reg = true;
+		tmp_reg = lock_reg;
+	} else {
+		assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+	}
+	assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+	Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+	Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
+	Address saved_mark_addr(lock_reg, 0);
+
+	// Biased locking
+	// See whether the lock is currently biased toward our thread and
+	// whether the epoch is still valid
+	// Note that the runtime guarantees sufficient alignment of JavaThread
+	// pointers to allow age to be placed into low bits
+	// First check to see whether biasing is even enabled for this object
+	Label cas_label;
+	int null_check_offset = -1;
+	if (!swap_reg_contains_mark) {
+		null_check_offset = offset();
+		//   movl(swap_reg, mark_addr);
+		lw(swap_reg, mark_addr);
+	}
+/////////////////////////////////////////////
+		//jerome_for_debug
+/*		Label ne;
+		move(AT, 0x00000005);
+		sub(AT, AT,swap_reg);
+		bne(AT, ZERO, ne);
+		delayed()->nop();
+		move(AT, (int)(&jerome8));
+		sw(swap_reg, AT, 0);
+		bind(ne);
+*/
+//////////////////////////////////////////////
+
+
+
+
+	if (need_tmp_reg) {
+		// pushl(tmp_reg);
+		push(tmp_reg);
+	}
+	//movl(tmp_reg, swap_reg);
+	move(tmp_reg, swap_reg);
+	//andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+	andi(tmp_reg,tmp_reg, markOopDesc::biased_lock_mask_in_place);
+	//cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
+	addi(AT, ZERO,markOopDesc::biased_lock_pattern);
+	sub(AT, AT, tmp_reg);
+	if (need_tmp_reg) {
+		// popl(tmp_reg);
+		pop(tmp_reg);
+	}
+
+	//jcc(Assembler::notEqual, cas_label);
+	bne(AT,ZERO,cas_label);
+	delayed()->nop();
+
+
+
+	// The bias pattern is present in the object's header. Need to check
+	// whether the bias owner and the epoch are both still current.
+	// Note that because there is no current thread register on x86 we
+	// need to store off the mark word we read out of the object to
+	// avoid reloading it and needing to recheck invariants below. This
+	// store is unfortunate but it makes the overall code shorter and
+	// simpler.
+	// movl(saved_mark_addr, swap_reg);
+/*
+	// jerome_for_debug
+	push(tmp_reg);
+	move(AT, (int)(&jerome1));
+	move(tmp_reg, 0xeeeeeeee);
+	sw(tmp_reg, AT, 0);
+	pop(tmp_reg);
+*/
+	sw(swap_reg,saved_mark_addr);
+	if (need_tmp_reg) {
+		//pushl(tmp_reg);
+		push(tmp_reg);
+	}
+	get_thread(tmp_reg);
+	//xorl(swap_reg, tmp_reg);
+	xorr(swap_reg,swap_reg, tmp_reg);
+	if (swap_reg_contains_mark) {
+		null_check_offset = offset();
+	}
+	// movl(tmp_reg, klass_addr);
+	lw(tmp_reg,klass_addr);
+	// xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes()
+	// + klassOopDesc::klass_part_offset_in_bytes()));
+	//xori(swap_reg, swap_reg,Address(tmp_reg, Klass::prototype_header_offset_in_bytes()
+	//+ klassOopDesc::klass_part_offset_in_bytes()));
+
+	lw(AT, Address(tmp_reg, Klass::prototype_header_offset_in_bytes()
+				+ klassOopDesc::klass_part_offset_in_bytes()));
+	xorr(swap_reg,swap_reg,AT);
+	// andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
+        move(AT, ~((int) markOopDesc::age_mask_in_place));
+	andr(swap_reg,swap_reg,AT);
+
+	if (need_tmp_reg) {
+		//popl(tmp_reg);
+		pop(tmp_reg);
+	}
+	if (PrintBiasedLockingStatistics) {
+		//FIXME
+		//cond_incl(ZERO, Address((int) BiasedLocking::biased_lock_entry_count_addr(),
+		//relocInfo::none));
+	}
+	//  jcc(Assembler::equal, done);
+	//FIXME, equal is for what ,there is no cmp or test here? @jerome
+	//beq(tmp_reg,ZERO, done);
+	beq(swap_reg,ZERO, done);
+	delayed()->nop();
+/*
+// jerome_for_debug
+	push(tmp_reg);
+	move(AT, (int)(&jerome2));
+	move(tmp_reg, 0xdddddddd);
+	sw(tmp_reg, AT, 0);
+	pop(tmp_reg);
+*/
+	Label try_revoke_bias;
+	Label try_rebias;
+
+	// At this point we know that the header has the bias pattern and
+	// that we are not the bias owner in the current epoch. We need to
+	// figure out more details about the state of the header in order to
+	// know what operations can be legally performed on the object's
+	// header.
+
+	// If the low three bits in the xor result aren't clear, that means
+	// the prototype header is no longer biased and we have to revoke
+	// the bias on this object.
+
+	//testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
+	//jcc(Assembler::notZero, try_revoke_bias);
+        move(AT, markOopDesc::biased_lock_mask_in_place);
+	andr(AT,swap_reg,AT );
+	bne(AT,ZERO,try_revoke_bias);
+	delayed()->nop();
+/*
+	// jerome_for_debug
+	push(tmp_reg);
+	move(AT, (int)(&jerome3));
+	move(tmp_reg, 0xcccccccc);
+	sw(tmp_reg, AT, 0);
+	pop(tmp_reg);
+*/
+	// Biasing is still enabled for this data type. See whether the
+	// epoch of the current bias is still valid, meaning that the epoch
+	// bits of the mark word are equal to the epoch bits of the
+	// prototype header. (Note that the prototype header's epoch bits
+	// only change at a safepoint.) If not, attempt to rebias the object
+	// toward the current thread. Note that we must be absolutely sure
+	// that the current epoch is invalid in order to do this because
+	// otherwise the manipulations it performs on the mark word are
+	// illegal.
+
+	// testl(swap_reg, markOopDesc::epoch_mask_in_place);
+	//jcc(Assembler::notZero, try_rebias);
+	move(AT, markOopDesc::epoch_mask_in_place);
+	andr(AT,swap_reg,AT);
+	bne(AT,ZERO,try_rebias);
+	delayed()->nop();
+/*
+	// jerome_for_debug
+	push(tmp_reg);
+	move(AT, (int)(&jerome4));
+	move(tmp_reg, 0xbbbbbbbb);
+	sw(tmp_reg, AT, 0);
+	pop(tmp_reg);
+*/
+	// The epoch of the current bias is still valid but we know nothing
+	// about the owner; it might be set or it might be clear. Try to
+	// acquire the bias of the object using an atomic operation. If this
+	// fails we will go in to the runtime to revoke the object's bias.
+	// Note that we first construct the presumed unbiased header so we
+	// don't accidentally blow away another thread's valid bias.
+
+	//movl(swap_reg, saved_mark_addr);
+	lw(swap_reg, saved_mark_addr);
+
+	//  andl(swap_reg,markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+	move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+	andr(swap_reg,swap_reg,AT);
+
+	if (need_tmp_reg) {
+		// pushl(tmp_reg);
+		push(tmp_reg);
+	}
+	get_thread(tmp_reg);
+	//orl(tmp_reg, swap_reg);
+	orr(tmp_reg,tmp_reg, swap_reg);
+	//if (os::is_MP()) {
+	// lock();
+	//}
+	//cmpxchg(tmp_reg, Address(obj_reg));
+	// what is store in eax now ? @jerome,see the entry of the func, swap_reg!
+	cmpxchg(tmp_reg, Address(obj_reg, 0),swap_reg);
+	if (need_tmp_reg) {
+		//popl(tmp_reg);
+		pop(tmp_reg);
+	}
+	// If the biasing toward our thread failed, this means that
+	// another thread succeeded in biasing it toward itself and we
+	// need to revoke that bias. The revocation will occur in the
+	// interpreter runtime in the slow case.
+	if (PrintBiasedLockingStatistics) {
+		//FIXME
+		// cond_incl(ZERO, Address((int) BiasedLocking::anonymously_biased_lock_entry_count_addr(), relocInfo::none));
+	}
+	if (slow_case != NULL) {
+		//jcc(Assembler::notZero, *slow_case);
+		beq(AT,ZERO, *slow_case);
+		delayed()->nop();
+	}
+	//jmp(done);
+/*
+	// jerome_for_debug
+	push(tmp_reg);
+	move(AT, (int)(&jerome5));
+	move(tmp_reg, 0xaaaaaaaa);
+	sw(ZERO, AT, 0);
+	pop(tmp_reg);
+*/
+	b(done);
+	delayed()->nop();
+
+	bind(try_rebias);
+	// At this point we know the epoch has expired, meaning that the
+	// current "bias owner", if any, is actually invalid. Under these
+	// circumstances _only_, we are allowed to use the current header's
+	// value as the comparison value when doing the cas to acquire the
+	// bias in the current epoch. In other words, we allow transfer of
+	// the bias from one thread to another directly in this situation.
+	//
+	// FIXME: due to a lack of registers we currently blow away the age
+	// bits in this situation. Should attempt to preserve them.
+	if (need_tmp_reg) {
+		// pushl(tmp_reg);
+		push(tmp_reg);
+	}
+/*
+	// jerome_for_debug
+	push(tmp_reg);
+	move(AT, (int)(&jerome6));
+	move(tmp_reg, 0x99999999);
+	sw(tmp_reg, AT, 0);
+	pop(tmp_reg);
+*/
+	get_thread(tmp_reg);
+	//movl(swap_reg, klass_addr);
+	lw(swap_reg, klass_addr);
+	// orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes()
+	// + klassOopDesc::klass_part_offset_in_bytes()));
+	lw(AT,Address(swap_reg, Klass::prototype_header_offset_in_bytes()
+				+ klassOopDesc::klass_part_offset_in_bytes()));
+	orr(tmp_reg,tmp_reg,AT);
+	// movl(swap_reg, saved_mark_addr);
+	lw(swap_reg, saved_mark_addr);
+
+	// if (os::is_MP()) {
+	//  lock();
+	//}
+	// cmpxchg(tmp_reg, Address(obj_reg));
+	cmpxchg(tmp_reg, Address(obj_reg, 0),swap_reg);
+	if (need_tmp_reg) {
+		//    popl(tmp_reg);
+		pop(tmp_reg);
+	}
+	// If the biasing toward our thread failed, then another thread
+	// succeeded in biasing it toward itself and we need to revoke that
+	// bias. The revocation will occur in the runtime in the slow case.
+	if (PrintBiasedLockingStatistics) {
+		//FIXME
+		//cond_incl(ZERO, Address((int) BiasedLocking::rebiased_lock_entry_count_addr(),
+		//relocInfo::none));
+	}
+	if (slow_case != NULL) {
+		//jcc(Assembler::notZero, *slow_case);
+		beq(AT,ZERO, *slow_case);
+		delayed()->nop();
+	}
+	//jmp(done);
+
+	b(done);
+	delayed()->nop();
+	bind(try_revoke_bias);
+	// The prototype mark in the klass doesn't have the bias bit set any
+	// more, indicating that objects of this data type are not supposed
+	// to be biased any more. We are going to try to reset the mark of
+	// this object to the prototype value and fall through to the
+	// CAS-based locking scheme. Note that if our CAS fails, it means
+	// that another thread raced us for the privilege of revoking the
+	// bias of this particular object, so it's okay to continue in the
+	// normal locking code.
+	//
+	// FIXME: due to a lack of registers we currently blow away the age
+	// bits in this situation. Should attempt to preserve them.
+	// movl(swap_reg, saved_mark_addr);
+	lw(swap_reg, saved_mark_addr);
+
+	if (need_tmp_reg) {
+		//pushl(tmp_reg);
+		push(tmp_reg);
+	}
+/*
+	// jerome_for_debug
+	push(tmp_reg);
+	move(AT, (int)(&jerome7));
+	move(tmp_reg, 0x88888888);
+	sw(tmp_reg, AT, 0);
+	pop(tmp_reg);
+*/
+	//movl(tmp_reg, klass_addr);
+	lw(tmp_reg, klass_addr);
+	//movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+	lw(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes()
+				+ klassOopDesc::klass_part_offset_in_bytes()));
+	//if (os::is_MP()) {
+	// lock();
+	//}
+	//cmpxchg(tmp_reg, Address(obj_reg));
+	cmpxchg(tmp_reg, Address(obj_reg, 0),swap_reg);
+	if (need_tmp_reg) {
+		//popl(tmp_reg);
+		pop(tmp_reg);
+	}
+	// Fall through to the normal CAS-based lock, because no matter what
+	// the result of the above CAS, some thread must have succeeded in
+	// removing the bias bit from the object's header.
+	if (PrintBiasedLockingStatistics) {
+		//FIXME
+		//cond_incl(ZERO, Address((int) BiasedLocking::revoked_lock_entry_count_addr(), relocInfo::none));
+	}
+
+	bind(cas_label);
+/*// jerome_for_debug
+	push(tmp_reg);
+	move(AT, (int)(&jerome8));
+	move(tmp_reg, 0x77777777);
+	sw(tmp_reg, AT, 0);
+	pop(tmp_reg);
+*/
+	return null_check_offset;
+}
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+
+  // Check for biased locking unlock case, which is a no-op
+	// Note: we do not have to check the thread ID for two reasons.
+	// First, the interpreter checks for IllegalMonitorStateException at
+	// a higher level. Second, if the bias was revoked while we held the
+	// lock, the object could not be rebiased toward another thread, so
+	// the bias bit would be clear.
+	//movl(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+	lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+	//andl(temp_reg, markOopDesc::biased_lock_mask_in_place);
+	andi(temp_reg, temp_reg,markOopDesc::biased_lock_mask_in_place);
+	// cmpl(temp_reg, markOopDesc::biased_lock_pattern);
+	addi(AT,ZERO,markOopDesc::biased_lock_pattern);
+	//jcc(Assembler::equal, done);
+
+	beq(AT,temp_reg,done);
+	delayed()->nop();
+}
+
+// NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME.
+// by yjl 6/27/2005
+// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
+// by yjl 7/11/2005
+// this method will handle the stack problem, you need not to preserve the stack space for the argument now
+// by yjl 8/1/2005
+void MacroAssembler::call_VM_leaf_base(address entry_point,
+                                       int number_of_arguments) {
+  //call(RuntimeAddress(entry_point));
+  //increment(rsp, number_of_arguments * wordSize);
+#ifndef OPT_THREAD
+  Register java_thread = T8;
+  get_thread(java_thread);
+#else
+  Register java_thread = TREG;
+#endif
+
+// save stack pointer
+  assert(number_of_arguments <= 4, "just check");
+	sw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+
+	if (number_of_arguments)
+	  addi(SP, SP, - number_of_arguments * wordSize);
+	move(AT, -8);
+	andr(SP, SP, AT);
+
+	call(entry_point, relocInfo::runtime_call_type);
+	delayed()->nop();
+
+#ifndef OPT_THREAD
+	get_thread(java_thread);
+#endif
+	lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+}
+
+
+// FIXME: i'm not sure of which register to use for jr. i use AT now.
+// by yjl 6/27/2005
+void MacroAssembler::jmp(address entry) {
+	//if (fit_in_jal((entry - pc() - 4))/4) {
+	//	j(entry);
+	//} else {
+		move(T9, (int)entry);
+		jr(T9);
+	//}
+}
+
+// FIXME: i'm not sure of which register to use for jr. i use AT now.
+// maybe should use T9 instead
+// by yjl 6/27/2005
+void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
+	switch (rtype) {
+		case relocInfo::runtime_call_type:
+		case relocInfo::none:
+			jmp(entry);
+			break;
+		default:
+			{
+			InstructionMark im(this);
+			relocate(rtype);
+			//move(T9, (int)entry);
+			lui(T9, Assembler::split_high((int)entry));
+			addiu(T9, T9, Assembler::split_low((int)entry));
+			jr(T9);
+			}
+			break;
+	}
+}
+
+void MacroAssembler::call(address entry) {
+	// c/c++ code assume T9 is it's entry point, so we just always move entry to t9
+	// maybe there is some more graceful method to handle this. FIXME
+	// by yjl 6/27/2005
+	move(T9, (int)entry);
+	jalr();
+}
+
+void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
+	switch (rtype) {
+	case relocInfo::runtime_call_type:
+	case relocInfo::none:
+	//	call(entry);
+		move(T9, (int)entry);
+		jalr();
+		break;
+	default:
+		{
+			InstructionMark im(this);
+			relocate(rtype);
+			//move(T9, (int)entry);
+			lui(T9, Assembler::split_high((int)entry));
+			addiu(T9, T9, Assembler::split_low((int)entry));
+			jalr();
+		}
+		break;
+	}
+}
+
+void MacroAssembler::call(address entry, RelocationHolder& rh)
+{
+	switch (rh.type()) {
+	case relocInfo::runtime_call_type:
+	case relocInfo::none:
+		call(entry);
+		break;
+	default:
+		{
+			InstructionMark im(this);
+			relocate(rh);
+			//move(T9, (int)entry);
+			lui(T9, Assembler::split_high((int)entry));
+			addiu(T9, T9, Assembler::split_low((int)entry));
+			jalr();
+		}
+		break;
+	}
+}
+
+void MacroAssembler::c2bool(Register r) {
+  Label L;
+  Assembler::beq(r, ZERO, L);
+  delayed()->nop();
+  move(r, 1);
+  bind(L);
+}
+
+static void pass_arg0(MacroAssembler* masm, Register arg) {
+  masm->push(arg);
+}
+
+static void pass_arg1(MacroAssembler* masm, Register arg) {
+  masm->push(arg);
+}
+
+static void pass_arg2(MacroAssembler* masm, Register arg) {
+  masm->push(arg);
+}
+
+static void pass_arg3(MacroAssembler* masm, Register arg) {
+  masm->push(arg);
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
+  // In order to get locks to work, we need to fake a in_VM state
+  JavaThread* thread = JavaThread::current();
+  JavaThreadState saved_state = thread->thread_state();
+  thread->set_thread_state(_thread_in_vm);
+  if (ShowMessageBoxOnError) {
+    JavaThread* thread = JavaThread::current();
+    JavaThreadState saved_state = thread->thread_state();
+    thread->set_thread_state(_thread_in_vm);
+    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+      ttyLocker ttyl;
+      BytecodeCounter::print();
+    }
+    // To see where a verify_oop failed, get $ebx+40/X for this frame.
+    // This is the value of eip which points to where verify_oop will return.
+    if (os::message_box(msg, "Execution stopped, print registers?")) {
+      ttyLocker ttyl;
+      tty->print_cr("eip = 0x%08x", eip);
+#ifndef PRODUCT
+      tty->cr();
+      findpc(eip);
+      tty->cr();
+#endif
+      tty->print_cr("rax, = 0x%08x", rax);
+      tty->print_cr("rbx, = 0x%08x", rbx);
+      tty->print_cr("rcx = 0x%08x", rcx);
+      tty->print_cr("rdx = 0x%08x", rdx);
+      tty->print_cr("rdi = 0x%08x", rdi);
+      tty->print_cr("rsi = 0x%08x", rsi);
+      tty->print_cr("rbp, = 0x%08x", rbp);
+      tty->print_cr("rsp = 0x%08x", rsp);
+      BREAKPOINT;
+    }
+  } else {
+    ttyLocker ttyl;
+    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+    assert(false, "DEBUG MESSAGE");
+  }
+  ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
+}
+
+void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
+	if ( ShowMessageBoxOnError ) {
+		JavaThreadState saved_state = JavaThread::current()->thread_state();
+		JavaThread::current()->set_thread_state(_thread_in_vm);
+		{
+			// In order to get locks work, we need to fake a in_VM state
+			ttyLocker ttyl;
+			::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
+			if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+				BytecodeCounter::print();
+			}
+
+//			if (os::message_box(msg, "Execution stopped, print registers?"))
+//				regs->print(::tty);
+		}
+		ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
+	}
+	else
+		::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+}
+
+
+void MacroAssembler::stop(const char* msg) {
+	move(A0, (int)msg);
+	//reserver space for argument. added by yjl 7/10/2005
+	addiu(SP, SP, - 1 * wordSize);
+	call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+	delayed()->nop();
+	//restore space for argument
+	addiu(SP, SP, 1 * wordSize);
+		brk(17);
+}
+
+void MacroAssembler::warn(const char* msg) {
+/*
+	push_CPU_state();
+
+  ExternalAddress message((address) msg);
+  // push address of message
+  pushptr(message.addr());
+
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)));
+  addl(rsp, wordSize);       // discard argument
+  pop_CPU_state();
+*/
+
+	save_registers(this);
+	sw(A0, SP, -1 * wordSize);
+	move(A0, (int)msg);
+	addi(SP, SP, -1 * wordSize);
+	call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+	delayed()->nop();
+	addi(SP, SP, 1 * wordSize);
+	lw(A0, SP, -1 * wordSize);
+	restore_registers(this);
+}
+
+void MacroAssembler::increment(Register reg, int imm) {
+	if (!imm) return;
+	if (is_simm16(imm)) {
+		addiu(reg, reg, imm);
+	} else {
+		move(AT, imm);
+		addu(reg, reg, AT);
+	}
+}
+
+void MacroAssembler::decrement(Register reg, int imm) {
+	increment(reg, -imm);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             bool check_exceptions) {
+	call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  /*
+	Label C, E;
+  call(C, relocInfo::none);
+  jmp(E);
+
+  bind(C);
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+  ret(0);
+
+  bind(E);
+	*/
+	if (arg_1!=A1)
+		move(A1, arg_1);
+	call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+	if (arg_1!=A1)
+		move(A1, arg_1);
+	if (arg_2!=A2)
+		move(A2, arg_2);
+	assert(arg_2 != A1, "smashed argument");
+	call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+	if (arg_1!=A1)
+		move(A1, arg_1);
+	if (arg_2!=A2)
+		move(A2, arg_2);
+	assert(arg_2 != A1, "smashed argument");
+	if (arg_3!=A3)
+		move(A3, arg_3);
+	assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
+	call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             int number_of_arguments,
+                             bool check_exceptions) {
+  //Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
+  //call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+	call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+	if (arg_1!=A1)
+		move(A1, arg_1);
+	call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+	if (arg_1!=A1)
+		move(A1, arg_1);
+	if (arg_2!=A2)
+		move(A2, arg_2); //assert(arg_2 != O1, "smashed argument");
+	call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+	if (arg_1!=A1)
+		move(A1, arg_1);
+	if (arg_2!=A2)
+		move(A2, arg_2);
+	assert(arg_2 != A1,                "smashed argument");
+	if (arg_3!=A3)
+		move(A3, arg_3);
+	assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
+	call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+                                  Register java_thread,
+                                  Register last_java_sp,
+                                  address  entry_point,
+                                  int      number_of_arguments,
+                                  bool     check_exceptions) {
+
+	address before_call_pc;
+	// determine java_thread register
+	if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+		java_thread = T2;
+		get_thread(java_thread);
+#else
+		java_thread = TREG;
+#endif
+	}
+	// determine last_java_sp register
+	if (!last_java_sp->is_valid()) {
+		last_java_sp = SP;
+	}
+	// debugging support
+	assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
+	assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
+	assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
+	assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
+
+	assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp");
+
+	// set last Java frame before call
+	before_call_pc = (address)pc();
+	set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
+
+	// do the call
+	move(A0, java_thread);
+	call(entry_point, relocInfo::runtime_call_type);
+	delayed()->nop();
+
+	// restore the thread (cannot use the pushed argument since arguments
+	// may be overwritten by C code generated by an optimizing compiler);
+	// however can use the register value directly if it is callee saved.
+#ifndef OPT_THREAD
+	if (java_thread >=S0 && java_thread <=S7) {
+#ifdef ASSERT
+		{ Label L;
+			get_thread(AT);
+			beq(java_thread, AT, L);
+			delayed()->nop();
+			stop("MacroAssembler::call_VM_base: edi not callee saved?");
+			bind(L);
+		}
+#endif
+	} else {
+		get_thread(java_thread);
+	}
+#endif
+
+	// discard thread and arguments
+	lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+	// reset last Java frame
+	reset_last_Java_frame(java_thread, false, true);
+
+	check_and_handle_popframe(java_thread);
+        check_and_handle_earlyret(java_thread);
+	if (check_exceptions) {
+		// check for pending exceptions (java_thread is set upon return)
+		Label L;
+		lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
+		beq(AT, ZERO, L);
+		delayed()->nop();
+		move(AT, (int)before_call_pc);
+		push(AT);
+		jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+		delayed()->nop();
+		bind(L);
+	}
+
+	// get oop result if there is one and reset the value in the thread
+	if (oop_result->is_valid()) {
+		lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
+		sw(ZERO, java_thread, in_bytes(JavaThread::vm_result_offset()));
+		verify_oop(oop_result);
+	}
+}
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+
+	move(V0, SP);
+	//we also reserve space for java_thread here
+	addi(SP, SP, (1 + number_of_arguments) * (- wordSize));
+	move(AT, 0xfffffff8);
+	andr(SP, SP, AT);
+	call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
+
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
+	call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
+  //pass_arg0(this, arg_0);
+  //call_VM_leaf(entry_point, 1);
+	if (arg_0!=A0)
+		move(A0, arg_0);
+	call_VM_leaf(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+/*
+  LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  call_VM_leaf(entry_point, 2);
+*/
+	if (arg_0 != A0)
+		move(A0, arg_0);
+	if (arg_1 != A1)
+		move(A1, arg_1);
+	assert(arg_1 != A1, "smashed argument");
+	call_VM_leaf(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+	if (arg_0 != A0)
+			move(A0, arg_0);
+	if (arg_1 != A1)
+			move(A1, arg_1);
+	assert(arg_1 != A1, "smashed argument");
+	if (arg_2 != A2)
+			move(A2, arg_2);
+	assert(arg_2 != A1 && arg_2 != A2, "smashed argument");
+	call_VM_leaf(entry_point, 3);
+}
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
+}
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+  if (needs_explicit_null_check(offset)) {
+    // provoke OS NULL exception if reg = NULL by
+    // accessing M[reg] w/o changing any (non-CC) registers
+    // NOTE: cmpl is plenty here to provoke a segv
+    lw(AT, reg, 0);
+		nop();
+		nop();
+		nop();
+		// Note: should probably use testl(rax, Address(reg, 0));
+    //       may be shorter code (however, this version of
+    //       testl needs to be implemented first)
+  } else {
+    // nothing to do, (later) access of M[reg + offset]
+    // will provoke OS NULL exception if reg = NULL
+  }
+}
+
+void MacroAssembler::enter() {
+  push2(RA, FP);
+  move(FP, SP);
+}
+
+void MacroAssembler::leave() {
+  //move(SP, FP);
+  //pop2(FP, RA);
+  addi(SP, FP, 2 * wordSize);
+  lw(RA, SP, - 1 * wordSize);
+  lw(FP, SP, - 2 * wordSize);
+}
+/*
+void MacroAssembler::os_breakpoint() {
+  // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
+  // (e.g., MSVC can't call ps() otherwise)
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
+}
+*/
+void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+    java_thread = T1;
+    get_thread(java_thread);
+#else
+    java_thread = TREG;
+#endif
+  }
+  // we must set sp to zero to clear frame
+  sw(ZERO, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // must clear fp, so that compiled frames are not confused; it is possible
+  // that we need it only for debugging
+  if(clear_fp)
+    sw(ZERO, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
+
+  if (clear_pc)
+    sw(ZERO, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
+}
+
+// Write serialization page so VM thread can do a pseudo remote membar.
+// We use the current thread pointer to calculate a thread specific
+// offset to write to within the page. This minimizes bus traffic
+// due to cache line collision.
+void MacroAssembler::serialize_memory(Register thread, Register tmp) {
+/*  movl(tmp, thread);
+  shrl(tmp, os::get_serialize_page_shift_count());
+  andl(tmp, (os::vm_page_size() - sizeof(int)));
+
+  Address index(noreg, tmp, Address::times_1);
+  ExternalAddress page(os::get_memory_serialize_page());
+
+  movptr(ArrayAddress(page, index), tmp);
+*/
+  move(tmp, thread);
+  srl(tmp, tmp,os::get_serialize_page_shift_count());
+  move(AT, (os::vm_page_size() - sizeof(int)));
+  andr(tmp, tmp,AT);
+
+	sw(tmp,Address(tmp, (int)os::get_memory_serialize_page()));
+}
+
+// Calls to C land
+//
+// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
+// has to be reset to 0. This is required to allow proper stack traversal.
+void MacroAssembler::set_last_Java_frame(Register java_thread,
+                                         Register last_java_sp,
+                                         Register last_java_fp,
+                                         address  last_java_pc) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+    java_thread = T2;
+    get_thread(java_thread);
+#else
+    java_thread = TREG;
+#endif
+  }
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = SP;
+  }
+
+  // last_java_fp is optional
+
+  if (last_java_fp->is_valid()) {
+    sw(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
+  }
+
+  // last_java_pc is optional
+
+  if (last_java_pc != NULL) {
+    relocate(relocInfo::internal_pc_type);
+    lui(AT, split_high((int)last_java_pc));
+    addiu(AT, AT, split_low((int)last_java_pc));
+    sw(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
+  }
+  sw(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+}
+//////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+/*
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+#ifndef _LP64
+                                          Register thread,
+#endif
+                                          Register tmp,
+                                          Register tmp2,
+                                          bool tosca_live) {
+  LP64_ONLY(Register thread = r15_thread;)
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  Label done;
+  Label runtime;
+
+  // if (!marking_in_progress) goto done;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    cmpb(in_progress, 0);
+  }
+  jcc(Assembler::equal, done);
+
+  // if (x.f == NULL) goto done;
+  cmpptr(Address(obj, 0), NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+
+  LP64_ONLY(movslq(tmp, index);)
+  movptr(tmp2, Address(obj, 0));
+#ifdef _LP64
+  cmpq(tmp, 0);
+#else
+  cmpl(index, 0);
+#endif
+  jcc(Assembler::equal, runtime);
+#ifdef _LP64
+  subq(tmp, wordSize);
+  movl(index, tmp);
+  addq(tmp, buffer);
+#else
+  subl(index, wordSize);
+  movl(tmp, buffer);
+  addl(tmp, index);
+#endif
+  movptr(Address(tmp, 0), tmp2);
+  jmp(done);
+  bind(runtime);
+  // save the live input values
+  if(tosca_live) push(rax);
+  push(obj);
+#ifdef _LP64
+  movq(c_rarg0, Address(obj, 0));
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
+  pop(thread);
+#endif
+  pop(obj);
+  if(tosca_live) pop(rax);
+  bind(done);
+
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+#ifndef _LP64
+                                           Register thread,
+#endif
+                                           Register tmp,
+                                           Register tmp2) {
+
+  LP64_ONLY(Register thread = r15_thread;)
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  movptr(tmp, store_addr);
+  xorptr(tmp, new_val);
+  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+  jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  cmpptr(new_val, (int32_t) NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base);
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+#ifdef _LP64
+  const Register card_addr = tmp;
+
+  movq(card_addr, store_addr);
+  shrq(card_addr, CardTableModRefBS::card_shift);
+
+  lea(tmp2, cardtable);
+
+  // get the address of the card
+  addq(card_addr, tmp2);
+#else
+  const Register card_index = tmp;
+
+  movl(card_index, store_addr);
+  shrl(card_index, CardTableModRefBS::card_shift);
+
+  Address index(noreg, card_index, Address::times_1);
+  const Register card_addr = tmp;
+  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
+#endif
+  cmpb(Address(card_addr, 0), 0);
+  jcc(Assembler::equal, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  movb(Address(card_addr, 0), 0);
+
+  cmpl(queue_index, 0);
+  jcc(Assembler::equal, runtime);
+  subl(queue_index, wordSize);
+  movptr(tmp2, buffer);
+#ifdef _LP64
+  movslq(rscratch1, queue_index);
+  addq(tmp2, rscratch1);
+  movq(Address(tmp2, 0), card_addr);
+#else
+  addl(tmp2, queue_index);
+  movl(Address(tmp2, 0), card_index);
+#endif
+  jmp(done);
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr);
+  push(new_val);
+#ifdef _LP64
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  pop(thread);
+#endif
+  pop(new_val);
+  pop(store_addr);
+
+  bind(done);
+
+}
+*/
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////
+
+
+void MacroAssembler::store_check(Register obj) {
+  // Does a store check for the oop in register obj. The content of
+  // register obj is destroyed afterwards.
+  store_check_part_1(obj);
+  store_check_part_2(obj);
+}
+
+void MacroAssembler::store_check(Register obj, Address dst) {
+  store_check(obj);
+}
+
+
+// split the store check operation so that other instructions can be scheduled inbetween
+void MacroAssembler::store_check_part_1(Register obj) {
+	BarrierSet* bs = Universe::heap()->barrier_set();
+	assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+	shr(obj, CardTableModRefBS::card_shift);
+}
+
+void MacroAssembler::store_check_part_2(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+	move(AT, (int)ct->byte_map_base);
+	add(AT, AT, obj);
+	sb(ZERO, AT, 0);
+	/*
+  // The calculation for byte_map_base is as follows:
+  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
+  // So this essentially converts an address to a displacement and
+  // it will never need to be relocated. On 64bit however the value may be too
+  // large for a 32bit displacement
+
+  intptr_t disp = (intptr_t) ct->byte_map_base;
+  if (is_simm32(disp)) {
+    Address cardtable(noreg, obj, Address::times_1, disp);
+    movb(cardtable, 0);
+  } else {
+    // By doing it as an ExternalAddress disp could be converted to a rip-relative
+    // displacement and done in a single instruction given favorable mapping and
+    // a smarter version of as_Address. Worst case it is two instructions which
+    // is no worse off then loading disp into a register and doing as a simple
+    // Address() as above.
+    // We can't do as ExternalAddress as the only style since if disp == 0 we'll
+    // assert since NULL isn't acceptable in a reloci (see 6644928). In any case
+    // in some cases we'll get a single instruction version.
+
+    ExternalAddress cardtable((address)disp);
+    Address index(noreg, obj, Address::times_1);
+    movb(as_Address(ArrayAddress(cardtable, index)), 0);
+  }
+	*/
+}
+/*
+void MacroAssembler::subptr(Register dst, int32_t imm32) {
+  LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
+}
+
+void MacroAssembler::subptr(Register dst, Register src) {
+  LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
+}
+
+void MacroAssembler::test32(Register src1, AddressLiteral src2) {
+  // src2 must be rval
+
+  if (reachable(src2)) {
+    testl(src1, as_Address(src2));
+  } else {
+    lea(rscratch1, src2);
+    testl(src1, Address(rscratch1, 0));
+  }
+}
+
+// C++ bool manipulation
+void MacroAssembler::testbool(Register dst) {
+  if(sizeof(bool) == 1)
+    testb(dst, 0xff);
+  else if(sizeof(bool) == 2) {
+    // testw implementation needed for two byte bools
+    ShouldNotReachHere();
+  } else if(sizeof(bool) == 4)
+    testl(dst, dst);
+  else
+    // unsupported
+    ShouldNotReachHere();
+}
+
+void MacroAssembler::testptr(Register dst, Register src) {
+  LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src));
+}
+
+
+*/
+
+// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
+		                                   Register t1, Register t2, Label& slow_case) {
+	assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
+
+	Register end = t2;
+	Register thread = t1;
+	verify_tlab(t1, t2);		//blows t1&t2
+
+	get_thread(thread);
+	lw(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
+
+	if (var_size_in_bytes == NOREG) {
+		// i dont think we need move con_size_in_bytes to a register first.
+		// by yjl 8/17/2005
+		assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
+		addi(end, obj, con_size_in_bytes);
+	} else {
+		add(end, obj, var_size_in_bytes);
+	}
+
+	lw(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
+	sltu(AT, AT, end);
+	bne(AT, ZERO, slow_case);
+	delayed()->nop();
+
+
+	// update the tlab top pointer
+	sw(end, thread, in_bytes(JavaThread::tlab_top_offset()));
+
+	// recover var_size_in_bytes if necessary
+	/*if (var_size_in_bytes == end) {
+		sub(var_size_in_bytes, end, obj);
+	}*/
+
+	verify_tlab(t1, t2);
+}
+
+// Defines obj, preserves var_size_in_bytes
+void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
+		Register t1, Register t2, Label& slow_case) {
+  assert_different_registers(obj, var_size_in_bytes, t1, AT);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
+    // No allocation in the shared eden.
+    b(slow_case);
+    delayed()->nop();
+  } else {
+
+    Address heap_top(t1, Assembler::split_low((int)Universe::heap()->top_addr()));
+    lui(t1, split_high((int)Universe::heap()->top_addr()));
+    lw(obj, heap_top);
+
+    Register end = t2;
+    Label retry;
+
+    bind(retry);
+    if (var_size_in_bytes == NOREG) {
+    	// i dont think we need move con_size_in_bytes to a register first.
+    	// by yjl 8/17/2005
+      assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first");
+      addi(end, obj, con_size_in_bytes);
+    } else {
+      add(end, obj, var_size_in_bytes);
+    }
+    // if end < obj then we wrapped around => object too long => slow case
+    sltu(AT, end, obj);
+    bne(AT, ZERO, slow_case);
+    delayed()->nop();
+
+    lui(AT, split_high((int)Universe::heap()->end_addr()));
+    lw(AT, AT, split_low((int)Universe::heap()->end_addr()));
+    sltu(AT, AT, end);
+    bne(AT, ZERO, slow_case);
+    delayed()->nop();
+    // Compare obj with the top addr, and if still equal, store the new top addr in
+    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
+    // it otherwise. Use lock prefix for atomicity on MPs.
+    if (os::is_MP()) {
+    	///lock();
+    }
+
+    // if someone beat us on the allocation, try again, otherwise continue
+    cmpxchg(end, heap_top, obj);
+    beq(AT, ZERO, retry);    //by yyq
+    delayed()->nop();
+
+  }
+}
+
+void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
+	Register top = T0;
+	Register t1  = T1;
+	Register t2  = T5;
+	Register t3  = T6;
+	Register thread_reg = T3;
+	Label do_refill, discard_tlab;
+	if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq
+		// No allocation in the shared eden.
+		b(slow_case);
+		delayed()->nop();
+	}
+
+	get_thread(thread_reg);
+
+	lw(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
+	lw(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
+
+	// calculate amount of free space
+	sub(t1, t1, top);
+	shr(t1, LogHeapWordSize);
+
+	// Retain tlab and allocate object in shared space if
+	// the amount free in the tlab is too large to discard.
+	lw(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
+	slt(AT, t2, t1);
+	beq(AT, ZERO, discard_tlab);
+	delayed()->nop();
+
+	// Retain
+
+	move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
+	add(t2, t2, AT);
+	sw(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
+
+	if (TLABStats) {
+		// increment number of slow_allocations
+		lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
+		addiu(AT, AT, 1);
+		sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
+	}
+	b(try_eden);
+	delayed()->nop();
+
+  bind(discard_tlab);
+	if (TLABStats) {
+		// increment number of refills
+		lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
+		addi(AT, AT, 1);
+		sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
+		// accumulate wastage -- t1 is amount free in tlab
+		lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
+		add(AT, AT, t1);
+		sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
+	}
+
+	// if tlab is currently allocated (top or end != null) then
+	// fill [top, end + alignment_reserve) with array object
+	beq(top, ZERO, do_refill);
+	delayed()->nop();
+
+	// set up the mark word
+	move(AT, (int)markOopDesc::prototype()->copy_set_hash(0x2));
+	sw(AT, top, oopDesc::mark_offset_in_bytes());
+
+	// set the length to the remaining space
+	addi(t1, t1, - typeArrayOopDesc::header_size(T_INT));
+	addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
+	shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
+	sw(t1, top, arrayOopDesc::length_offset_in_bytes());
+
+	// set klass to intArrayKlass
+	lui(AT, split_high((int)Universe::intArrayKlassObj_addr()));
+	lw(t1, AT, split_low((int)Universe::intArrayKlassObj_addr()));
+	sw(t1, top, oopDesc::klass_offset_in_bytes());
+
+	// refill the tlab with an eden allocation
+	bind(do_refill);
+	lw(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
+	shl(t1, LogHeapWordSize);
+	// add object_size ??
+	eden_allocate(top, t1, 0, t2, t3, slow_case);
+
+	// Check that t1 was preserved in eden_allocate.
+#ifdef ASSERT
+	if (UseTLAB) {
+		Label ok;
+		assert_different_registers(thread_reg, t1);
+		lw(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
+		shl(AT, LogHeapWordSize);
+		beq(AT, t1, ok);
+		delayed()->nop();
+		stop("assert(t1 != tlab size)");
+		should_not_reach_here();
+
+		bind(ok);
+	}
+#endif
+	sw(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
+	sw(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
+	add(top, top, t1);
+	addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+	sw(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
+	verify_tlab(t1, t2);
+	b(retry);
+	delayed()->nop();
+}
+
+static const double     pi_4 =  0.7853981633974483;
+
+// the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME
+// must get argument(a double) in F12/F13
+//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
+//We need to preseve the register which maybe modified during the Call @Jerome
+void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
+//save all modified register here
+//	if (preserve_cpu_regs) {
+//	}
+//FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9
+	pushad();
+//we should preserve the stack space before we call
+	addi(SP, SP, -wordSize * 2);
+        switch (trig){
+		case 's' :
+              		call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
+			delayed()->nop();
+			break;
+		case 'c':
+			call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
+			delayed()->nop();
+			break;
+		case 't':
+			call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
+			delayed()->nop();
+			break;
+		default:assert (false, "bad intrinsic")
+		break;
+
+	}
+
+	addi(SP, SP, wordSize * 2);
+	popad();
+//	if (preserve_cpu_regs) {
+//	}
+}
+/*
+
+void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
+  ucomisd(dst, as_Address(src));
+}
+
+void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
+  ucomiss(dst, as_Address(src));
+}
+
+void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    xorpd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    xorpd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    xorps(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    xorps(dst, Address(rscratch1, 0));
+  }
+}
+*/
+
+void MacroAssembler::move(Register reg, int imm) {
+  if (is_simm16(imm)) {
+    addiu(reg, ZERO, imm);
+  } else {
+    lui(reg, split_high(imm));
+    if (split_low(imm))
+      addiu(reg, reg, split_low(imm));
+  }
+}
+
+// NOTE: i dont push eax as i486.
+// the x86 save eax for it use eax as the jump register
+void MacroAssembler::verify_oop(Register reg, const char* s) {
+/*
+  if (!VerifyOops) return;
+
+  // Pass register number to verify_oop_subroutine
+  char* b = new char[strlen(s) + 50];
+  sprintf(b, "verify_oop: %s: %s", reg->name(), s);
+  push(rax);                          // save rax,
+  push(reg);                          // pass register argument
+  ExternalAddress buffer((address) b);
+  // avoid using pushptr, as it modifies scratch registers
+  // and our contract is not to modify anything
+  movptr(rax, buffer.addr());
+  push(rax);
+  // call indirectly to solve generation ordering problem
+  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+  call(rax);
+*/
+	if (!VerifyOops) return;
+
+	// Pass register number to verify_oop_subroutine
+	char* b = new char[strlen(s) + 50];
+	sprintf(b, "verify_oop: %s: %s", reg->name(), s);
+	sw(T5, SP, - wordSize);
+	sw(T6, SP, - 2*wordSize);
+	sw(RA, SP, - 3*wordSize);
+	sw(A0, SP ,- 4*wordSize);
+	sw(A1, SP ,- 5*wordSize);
+	sw(AT, SP ,- 6*wordSize);
+	sw(T9, SP ,- 7*wordSize);
+	addiu(SP, SP, - 7 * wordSize);
+	move(A0, (int)b);
+	move(A1, reg);
+	// call indirectly to solve generation ordering problem
+	move(AT, (int)StubRoutines::verify_oop_subroutine_entry_address());
+	lw(T9, AT, 0);
+	jalr(T9);
+	delayed()->nop();
+	lw(T5, SP, 6* wordSize);
+	lw(T6, SP, 5* wordSize);
+	lw(RA, SP, 4* wordSize);
+	lw(A0, SP, 3* wordSize);
+	lw(A1, SP, 2* wordSize);
+	lw(AT, SP, 1* wordSize);
+	lw(T9, SP, 0* wordSize);
+	addiu(SP, SP, 7 * wordSize);
+}
+
+
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+/*
+	if (!VerifyOops) return;
+
+  // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
+  // Pass register number to verify_oop_subroutine
+  char* b = new char[strlen(s) + 50];
+  sprintf(b, "verify_oop_addr: %s", s);
+
+  push(rax);                          // save rax,
+  // addr may contain rsp so we will have to adjust it based on the push
+  // we just did
+  // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
+  // stores rax into addr which is backwards of what was intended.
+  if (addr.uses(rsp)) {
+    lea(rax, addr);
+    pushptr(Address(rax, BytesPerWord));
+  } else {
+    pushptr(addr);
+  }
+
+  ExternalAddress buffer((address) b);
+  // pass msg argument
+  // avoid using pushptr, as it modifies scratch registers
+  // and our contract is not to modify anything
+  movptr(rax, buffer.addr());
+  push(rax);
+
+  // call indirectly to solve generation ordering problem
+  movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
+  call(rax);
+  // Caller pops the arguments and restores rax, from the stack
+*/
+	if (!VerifyOops) {
+		nop();
+		return;
+	}
+	// Pass register number to verify_oop_subroutine
+	Address adjust(addr.base(),addr.disp()+BytesPerWord);
+	char* b = new char[strlen(s) + 50];
+	sprintf(b, "verify_oop_addr: %s",  s);
+
+	sw(T5, SP, - wordSize);
+	sw(T6, SP, - 2*wordSize);
+	sw(RA, SP, - 3*wordSize);
+	sw(A0, SP, - 4*wordSize);
+	sw(A1, SP, - 5*wordSize);
+	sw(AT, SP, - 6*wordSize);
+	sw(T9, SP, - 7*wordSize);
+	addiu(SP, SP, - 7 * wordSize);
+
+	move(A0, (int)b);
+	lw(A1, adjust);
+	// call indirectly to solve generation ordering problem
+	move(AT, (int)StubRoutines::verify_oop_subroutine_entry_address());
+	lw(T9, AT, 0);
+	jalr(T9);
+	delayed()->nop();
+	lw(T5, SP, 6* wordSize);
+	lw(T6, SP, 5* wordSize);
+	lw(RA, SP, 4* wordSize);
+	lw(A0, SP, 3* wordSize);
+	lw(A1, SP, 2* wordSize);
+	lw(AT, SP, 1* wordSize);
+	lw(T9, SP, 0* wordSize);
+	addiu(SP, SP, 7 * wordSize);
+}
+
+// used registers :  T5, T6
+void MacroAssembler::verify_oop_subroutine() {
+	// [sp - 1]: ra
+	// [sp + 0]: char* error message    A0
+	// [sp + 1]: oop   object to verify A1
+
+	Label exit, error, error1,error2,error3,error4;
+	// increment counter
+	move(T5, (int)StubRoutines::verify_oop_count_addr());
+	lw(AT, T5, 0);
+	addi(AT, AT, 1);
+	sw(AT, T5, 0);
+
+	// make sure object is 'reasonable'
+	beq(A1, ZERO, exit);         // if obj is NULL it is ok
+	delayed()->nop();
+
+	// Check if the oop is in the right area of memory
+	const int oop_mask = Universe::verify_oop_mask();
+	const int oop_bits = Universe::verify_oop_bits();
+	move(AT, oop_mask);
+	andr(T5, A1, AT);
+	move(AT, oop_bits);
+	/*
+	//jerome_for_debug
+	bne(T5, AT, error);
+	delayed()->nop();
+	 */
+	// make sure klass is 'reasonable'
+	lw(T5, A1, oopDesc::klass_offset_in_bytes()); // get klass
+	/*
+	//jerome_for_debug
+	beq(T5, ZERO, error1);                        // if klass is NULL it is broken
+	delayed()->nop();
+	 */
+	// Check if the klass is in the right area of memory
+	const int klass_mask = Universe::verify_klass_mask();
+	const int klass_bits = Universe::verify_klass_bits();
+
+	move(AT, klass_mask);
+	andr(T6, T5, AT);
+	move(AT, klass_bits);
+	bne(T6, AT, error2);
+	delayed()->nop();
+
+	// make sure klass' klass is 'reasonable'
+	lw(T5, T5, oopDesc::klass_offset_in_bytes()); // get klass' klass
+	beq(T5, ZERO, error3);  // if klass' klass is NULL it is broken
+	delayed()->nop();
+
+	move(AT, klass_mask);
+	andr(T6, T5, AT);
+	move(AT, klass_bits);
+	bne(T6, AT, error4);
+	delayed()->nop();     // if klass not in right area of memory it is broken too.
+
+	// return if everything seems ok
+	bind(exit);
+
+	jr(RA);
+	delayed()->nop();
+
+	// handle errors
+	bind(error);
+	lw(AT, ZERO, 16);
+	sw(RA, SP, (-1) * wordSize);
+	sw(FP, SP, (-2) * wordSize);
+	//save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2) + 2);
+	//RegistersForDebugging::save_registers(this);
+	//move(A1, SP);
+	//addi(SP, SP, (-2) * wordSize);
+	pushad();
+	addi(SP, SP, (-3) * wordSize);
+	call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+	delayed()->nop();
+	//addi(SP, SP, 2 * wordSize);
+	addiu(SP, SP, 3 * wordSize);
+	popad();
+	//RegistersForDebugging::restore_registers(this, SP);
+	//restore();
+	lw(RA, SP, (-1) * wordSize);
+	lw(FP, SP, (-2) * wordSize);
+	jr(RA);
+	delayed()->nop();
+	//jerome_for_debug
+	bind(error1);
+	stop("error1");
+	bind(error2);
+	stop("error2");
+	bind(error3);
+	stop("error3");
+	bind(error4);
+	stop("error4");
+}
+
+void MacroAssembler::verify_tlab(Register t1, Register t2) {
+#ifdef ASSERT
+	assert_different_registers(t1, t2, AT);
+  if (UseTLAB && VerifyOops) {
+		Label next, ok;
+
+		get_thread(t1);
+
+		lw(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
+		lw(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
+		sltu(AT, t2, AT);
+		beq(AT, ZERO, next);
+		delayed()->nop();
+
+		stop("assert(top >= start)");
+
+		bind(next);
+		lw(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
+		sltu(AT, AT, t2);
+		beq(AT, ZERO, ok);
+		delayed()->nop();
+
+		stop("assert(top <= end)");
+
+		bind(ok);
+
+		/*
+    Label next, ok;
+    Register t1 = rsi;
+    Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread);
+
+    push(t1);
+    NOT_LP64(push(thread_reg));
+    NOT_LP64(get_thread(thread_reg));
+
+    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
+    jcc(Assembler::aboveEqual, next);
+    stop("assert(top >= start)");
+    should_not_reach_here();
+
+    bind(next);
+    movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
+    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
+    jcc(Assembler::aboveEqual, ok);
+    stop("assert(top <= end)");
+    should_not_reach_here();
+
+    bind(ok);
+    NOT_LP64(pop(thread_reg));
+    pop(t1);
+		*/
+  }
+#endif
+}
+
+void MacroAssembler::hswap(Register reg) {
+	//andi(reg, reg, 0xffff);
+	srl(AT, reg, 8);
+	sll(reg, reg, 24);
+	sra(reg, reg, 16);
+	orr(reg, reg, AT);
+}
+
+void MacroAssembler::huswap(Register reg) {
+	//andi(reg, reg, 0xffff);
+	srl(AT, reg, 8);
+	sll(reg, reg, 24);
+	srl(reg, reg, 16);
+	orr(reg, reg, AT);
+}
+
+// something funny to do this will only one more register AT
+// by yjl 6/29/2005
+void MacroAssembler::swap(Register reg) {
+	srl(AT, reg, 8);
+	sll(reg, reg, 24);
+	orr(reg, reg, AT);
+	//reg : 4 1 2 3
+	srl(AT, AT, 16);
+	xorr(AT, AT, reg);
+	andi(AT, AT, 0xff);
+	//AT : 0 0 0 1^3);
+	xorr(reg, reg, AT);
+	//reg : 4 1 2 1
+	sll(AT, AT, 16);
+	xorr(reg, reg, AT);
+	//reg : 4 3 2 1
+}
+
+void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
+	Label done, again, nequal;
+
+	bind(again);
+	ll(AT, dest);
+	bne(AT, c_reg, nequal);
+	delayed()->nop();
+
+	move(AT, x_reg);
+	sc(AT, dest);
+	beq(AT, ZERO, again);
+	delayed()->nop();
+	b(done);
+	delayed()->nop();
+
+	// not xchged
+	bind(nequal);
+	move(c_reg, AT);
+	move(AT, ZERO);
+
+	bind(done);
+}
+
+void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
+	Label done, again, nequal;
+
+	Register x_reg = x_regLo;
+	dsll32(x_regHi, x_regHi, 0);
+	dsll32(x_regLo, x_regLo, 0);
+	dsrl32(x_regLo, x_regLo, 0);
+	orr(x_reg, x_regLo, x_regHi);
+
+	Register c_reg = c_regLo;
+	dsll32(c_regHi, c_regHi, 0);
+	dsll32(c_regLo, c_regLo, 0);
+	dsrl32(c_regLo, c_regLo, 0);
+	orr(c_reg, c_regLo, c_regHi);
+
+	bind(again);
+	lld(AT, dest);
+	bne(AT, c_reg, nequal);
+	delayed()->nop();
+
+	//move(AT, x_reg);
+	dadd(AT, x_reg, ZERO);
+	scd(AT, dest);
+	beq(AT, ZERO, again);
+	delayed()->nop();
+	b(done);
+	delayed()->nop();
+
+	// not xchged
+	bind(nequal);
+	//move(c_reg, AT);
+	//move(AT, ZERO);
+	dadd(c_reg, AT, ZERO);
+	dadd(AT, ZERO, ZERO);
+	bind(done);
+}
+
+// be sure the three register is different
+void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
+  assert_different_registers(fd, fs, ft);
+	div_s(fd, fs, ft);
+	trunc_l_s(fd, fd);
+	cvt_s_l(fd, fd);
+	mul_s(fd, fd, ft);
+	sub_s(fd, fs, fd);
+}
+
+// be sure the three register is different
+void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
+	assert_different_registers(fd, fs, ft);
+
+	div_d(fd, fs, ft);
+	trunc_l_d(fd, fd);
+	cvt_d_l(fd, fd);
+	mul_d(fd, fd, ft);
+	sub_d(fd, fs, fd);
+}
+
+class ControlWord {
+				public:
+								int32_t _value;
+
+  int  rounding_control() const        { return  (_value >> 10) & 3      ; }
+  int  precision_control() const       { return  (_value >>  8) & 3      ; }
+  bool precision() const               { return ((_value >>  5) & 1) != 0; }
+  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
+  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
+  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
+  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
+  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
+
+  void print() const {
+    // rounding control
+    const char* rc;
+    switch (rounding_control()) {
+      case 0: rc = "round near"; break;
+      case 1: rc = "round down"; break;
+      case 2: rc = "round up  "; break;
+      case 3: rc = "chop      "; break;
+    };
+    // precision control
+    const char* pc;
+    switch (precision_control()) {
+      case 0: pc = "24 bits "; break;
+      case 1: pc = "reserved"; break;
+      case 2: pc = "53 bits "; break;
+      case 3: pc = "64 bits "; break;
+    };
+    // flags
+    char f[9];
+    f[0] = ' ';
+    f[1] = ' ';
+    f[2] = (precision   ()) ? 'P' : 'p';
+    f[3] = (underflow   ()) ? 'U' : 'u';
+    f[4] = (overflow    ()) ? 'O' : 'o';
+    f[5] = (zero_divide ()) ? 'Z' : 'z';
+    f[6] = (denormalized()) ? 'D' : 'd';
+    f[7] = (invalid     ()) ? 'I' : 'i';
+    f[8] = '\x0';
+    // output
+    printf("%04x  masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
+  }
+
+};
+
+class StatusWord {
+ public:
+  int32_t _value;
+
+  bool busy() const                    { return ((_value >> 15) & 1) != 0; }
+  bool C3() const                      { return ((_value >> 14) & 1) != 0; }
+  bool C2() const                      { return ((_value >> 10) & 1) != 0; }
+  bool C1() const                      { return ((_value >>  9) & 1) != 0; }
+  bool C0() const                      { return ((_value >>  8) & 1) != 0; }
+  int  top() const                     { return  (_value >> 11) & 7      ; }
+  bool error_status() const            { return ((_value >>  7) & 1) != 0; }
+  bool stack_fault() const             { return ((_value >>  6) & 1) != 0; }
+  bool precision() const               { return ((_value >>  5) & 1) != 0; }
+  bool underflow() const               { return ((_value >>  4) & 1) != 0; }
+  bool overflow() const                { return ((_value >>  3) & 1) != 0; }
+  bool zero_divide() const             { return ((_value >>  2) & 1) != 0; }
+  bool denormalized() const            { return ((_value >>  1) & 1) != 0; }
+  bool invalid() const                 { return ((_value >>  0) & 1) != 0; }
+
+  void print() const {
+    // condition codes
+    char c[5];
+    c[0] = (C3()) ? '3' : '-';
+    c[1] = (C2()) ? '2' : '-';
+    c[2] = (C1()) ? '1' : '-';
+    c[3] = (C0()) ? '0' : '-';
+    c[4] = '\x0';
+    // flags
+    char f[9];
+    f[0] = (error_status()) ? 'E' : '-';
+    f[1] = (stack_fault ()) ? 'S' : '-';
+    f[2] = (precision   ()) ? 'P' : '-';
+    f[3] = (underflow   ()) ? 'U' : '-';
+    f[4] = (overflow    ()) ? 'O' : '-';
+    f[5] = (zero_divide ()) ? 'Z' : '-';
+    f[6] = (denormalized()) ? 'D' : '-';
+    f[7] = (invalid     ()) ? 'I' : '-';
+    f[8] = '\x0';
+    // output
+    printf("%04x  flags = %s, cc =  %s, top = %d", _value & 0xFFFF, f, c, top());
+  }
+
+};
+
+class TagWord {
+ public:
+  int32_t _value;
+
+  int tag_at(int i) const              { return (_value >> (i*2)) & 3; }
+
+  void print() const {
+    printf("%04x", _value & 0xFFFF);
+  }
+
+};
+
+class FPU_Register {
+ public:
+  int32_t _m0;
+  int32_t _m1;
+  int16_t _ex;
+
+  bool is_indefinite() const           {
+    return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
+  }
+
+  void print() const {
+    char  sign = (_ex < 0) ? '-' : '+';
+    const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : "   ";
+    printf("%c%04hx.%08x%08x  %s", sign, _ex, _m1, _m0, kind);
+  };
+
+};
+
+class FPU_State {
+ public:
+  enum {
+    register_size       = 10,
+    number_of_registers =  8,
+    register_mask       =  7
+  };
+
+  ControlWord  _control_word;
+  StatusWord   _status_word;
+  TagWord      _tag_word;
+  int32_t      _error_offset;
+  int32_t      _error_selector;
+  int32_t      _data_offset;
+  int32_t      _data_selector;
+  int8_t       _register[register_size * number_of_registers];
+
+  int tag_for_st(int i) const          { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
+  FPU_Register* st(int i) const        { return (FPU_Register*)&_register[register_size * i]; }
+
+  const char* tag_as_string(int tag) const {
+    switch (tag) {
+      case 0: return "valid";
+      case 1: return "zero";
+      case 2: return "special";
+      case 3: return "empty";
+    }
+    ShouldNotReachHere()
+    return NULL;
+  }
+
+  void print() const {
+    // print computation registers
+    { int t = _status_word.top();
+      for (int i = 0; i < number_of_registers; i++) {
+        int j = (i - t) & register_mask;
+        printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
+        st(j)->print();
+        printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
+      }
+    }
+    printf("\n");
+    // print control registers
+    printf("ctrl = "); _control_word.print(); printf("\n");
+    printf("stat = "); _status_word .print(); printf("\n");
+    printf("tags = "); _tag_word    .print(); printf("\n");
+  }
+
+};
+
+class Flag_Register {
+ public:
+  int32_t _value;
+
+  bool overflow() const                { return ((_value >> 11) & 1) != 0; }
+  bool direction() const               { return ((_value >> 10) & 1) != 0; }
+  bool sign() const                    { return ((_value >>  7) & 1) != 0; }
+  bool zero() const                    { return ((_value >>  6) & 1) != 0; }
+  bool auxiliary_carry() const         { return ((_value >>  4) & 1) != 0; }
+  bool parity() const                  { return ((_value >>  2) & 1) != 0; }
+  bool carry() const                   { return ((_value >>  0) & 1) != 0; }
+
+  void print() const {
+    // flags
+    char f[8];
+    f[0] = (overflow       ()) ? 'O' : '-';
+    f[1] = (direction      ()) ? 'D' : '-';
+    f[2] = (sign           ()) ? 'S' : '-';
+    f[3] = (zero           ()) ? 'Z' : '-';
+    f[4] = (auxiliary_carry()) ? 'A' : '-';
+    f[5] = (parity         ()) ? 'P' : '-';
+    f[6] = (carry          ()) ? 'C' : '-';
+    f[7] = '\x0';
+    // output
+    printf("%08x  flags = %s", _value, f);
+  }
+
+};
+
+class IU_Register {
+ public:
+  int32_t _value;
+
+  void print() const {
+    printf("%08x  %11d", _value, _value);
+  }
+
+};
+
+class IU_State {
+ public:
+  Flag_Register _eflags;
+  IU_Register   _rdi;
+  IU_Register   _rsi;
+  IU_Register   _rbp;
+  IU_Register   _rsp;
+  IU_Register   _rbx;
+  IU_Register   _rdx;
+  IU_Register   _rcx;
+  IU_Register   _rax;
+
+  void print() const {
+    // computation registers
+    printf("rax,  = "); _rax.print(); printf("\n");
+    printf("rbx,  = "); _rbx.print(); printf("\n");
+    printf("rcx  = "); _rcx.print(); printf("\n");
+    printf("rdx  = "); _rdx.print(); printf("\n");
+    printf("rdi  = "); _rdi.print(); printf("\n");
+    printf("rsi  = "); _rsi.print(); printf("\n");
+    printf("rbp,  = "); _rbp.print(); printf("\n");
+    printf("rsp  = "); _rsp.print(); printf("\n");
+    printf("\n");
+    // control registers
+    printf("flgs = "); _eflags.print(); printf("\n");
+  }
+};
+
+
+class CPU_State {
+ public:
+  FPU_State _fpu_state;
+  IU_State  _iu_state;
+
+  void print() const {
+    printf("--------------------------------------------------\n");
+    _iu_state .print();
+    printf("\n");
+    _fpu_state.print();
+    printf("--------------------------------------------------\n");
+  }
+
+};
+
+
+static void _print_CPU_state(CPU_State* state) {
+  state->print();
+};
+
+/*
+void MacroAssembler::print_CPU_state() {
+  push_CPU_state();
+  push(rsp);                // pass CPU state
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)));
+  addptr(rsp, wordSize);       // discard argument
+  pop_CPU_state();
+}
+*/
+
+void MacroAssembler::align(int modulus) {
+	while (offset() % modulus != 0) nop();
+}
+
+static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
+  static int counter = 0;
+  FPU_State* fs = &state->_fpu_state;
+  counter++;
+  // For leaf calls, only verify that the top few elements remain empty.
+  // We only need 1 empty at the top for C2 code.
+  if( stack_depth < 0 ) {
+    if( fs->tag_for_st(7) != 3 ) {
+      printf("FPR7 not empty\n");
+      state->print();
+      assert(false, "error");
+      return false;
+    }
+    return true;                // All other stack states do not matter
+  }
+
+  assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std,
+         "bad FPU control word");
+
+  // compute stack depth
+  int i = 0;
+  while (i < FPU_State::number_of_registers && fs->tag_for_st(i)  < 3) i++;
+  int d = i;
+  while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
+  // verify findings
+  if (i != FPU_State::number_of_registers) {
+    // stack not contiguous
+    printf("%s: stack not contiguous at ST%d\n", s, i);
+    state->print();
+    assert(false, "error");
+    return false;
+  }
+  // check if computed stack depth corresponds to expected stack depth
+  if (stack_depth < 0) {
+    // expected stack depth is -stack_depth or less
+    if (d > -stack_depth) {
+      // too many elements on the stack
+      printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
+      state->print();
+      assert(false, "error");
+      return false;
+    }
+  } else {
+    // expected stack depth is stack_depth
+    if (d != stack_depth) {
+      // wrong stack depth
+      printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
+      state->print();
+      assert(false, "error");
+      return false;
+    }
+  }
+  // everything is cool
+  return true;
+}
+
+
+void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
+	//FIXME aoqi
+	// %%%%% need to implement this
+	//Unimplemented();
+	/*
+	if (!VerifyFPU) return;
+  push_CPU_state();
+  push(rsp);                // pass CPU state
+  ExternalAddress msg((address) s);
+  // pass message string s
+  pushptr(msg.addr());
+  push(stack_depth);        // pass stack depth
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)));
+  addptr(rsp, 3 * wordSize);   // discard arguments
+  // check for error
+  { Label L;
+    testl(rax, rax);
+    jcc(Assembler::notZero, L);
+    int3();                  // break if error condition
+    bind(L);
+  }
+  pop_CPU_state();
+	*/
+}
+//We preserve all caller-saved register
+void  MacroAssembler::pushad(){
+
+	push(AT);
+	push(A0);
+	push(A1);
+	push(A2);
+	push(A3);
+	push(V0);
+	push(V1);
+	push(T0);
+	push(T1);
+	push(T2);
+	push(T3);
+	push(T4);
+	push(T5);
+	push(T6);
+	push(T7);
+	push(T8);
+	push(T9);
+	push(GP);
+	push(RA);
+	push(FP);
+
+};
+
+void  MacroAssembler::popad(){
+	pop(FP);
+	pop(RA);
+	pop(GP);
+	pop(T9);
+	pop(T8);
+	pop(T7);
+	pop(T6);
+	pop(T5);
+	pop(T4);
+	pop(T3);
+	pop(T2);
+	pop(T1);
+	pop(T0);
+	pop(V1);
+	pop(V0);
+	pop(A3);
+	pop(A2);
+	pop(A1);
+	pop(A0);
+	pop(AT);
+};
+
+void MacroAssembler::push2(Register reg1, Register reg2) {
+  addi(SP, SP, -8);
+	sw(reg2, SP, 0);
+	sw(reg1, SP, 4);
+}
+
+void MacroAssembler::pop2(Register reg1, Register reg2) {
+	lw(reg1, SP, 0);
+	lw(reg2, SP, 4);
+	addi(SP, SP, 8);
+}
+
+void MacroAssembler::load_two_bytes_from_at_bcp(Register reg, Register tmp, int offset)
+{
+
+	if(offset & 1){
+		lbu(reg, BCP, offset+1);
+		lbu(tmp, BCP, offset);
+		sll(reg, reg, 8);
+		addu(reg, tmp, reg);
+	}
+	else
+		lhu(reg, BCP, offset);
+}
+
+void MacroAssembler::store_two_byts_to_at_bcp(Register reg, Register tmp, int offset)
+{
+	if(offset & 1){
+
+		sb(reg, BCP, offset);
+		srl(reg, reg, 8);
+		sb(reg, BCP, offset + 1);
+	}
+	else
+		sh(reg, BCP, offset);
+}
+
+/*
+void MacroAssembler::load_klass(Register dst, Register src) {
+#ifdef _LP64
+  if (UseCompressedOops) {
+    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    decode_heap_oop_not_null(dst);
+  } else
+#endif
+    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+}
+
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+#ifdef _LP64
+  if (UseCompressedOops) {
+    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  } else
+#endif
+    {
+      movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+      movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    }
+}
+
+void MacroAssembler::store_klass(Register dst, Register src) {
+#ifdef _LP64
+  if (UseCompressedOops) {
+    encode_heap_oop_not_null(src);
+    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+  } else
+#endif
+    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+}
+
+#ifdef _LP64
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
+  if (UseCompressedOops) {
+    // Store to klass gap in destination
+    movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
+  }
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src) {
+  if (UseCompressedOops) {
+    movl(dst, src);
+    decode_heap_oop(dst);
+  } else {
+    movq(dst, src);
+  }
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src) {
+  if (UseCompressedOops) {
+    assert(!dst.uses(src), "not enough registers");
+    encode_heap_oop(src);
+    movl(dst, src);
+  } else {
+    movq(dst, src);
+  }
+}
+
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+void MacroAssembler::encode_heap_oop(Register r) {
+  assert (UseCompressedOops, "should be compressed");
+#ifdef ASSERT
+  if (CheckCompressedOops) {
+    Label ok;
+    push(rscratch1); // cmpptr trashes rscratch1
+    cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
+    jcc(Assembler::equal, ok);
+    stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
+    bind(ok);
+    pop(rscratch1);
+  }
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop");
+  testq(r, r);
+  cmovq(Assembler::equal, r, r12_heapbase);
+  subq(r, r12_heapbase);
+  shrq(r, LogMinObjAlignmentInBytes);
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register r) {
+  assert (UseCompressedOops, "should be compressed");
+#ifdef ASSERT
+  if (CheckCompressedOops) {
+    Label ok;
+    testq(r, r);
+    jcc(Assembler::notEqual, ok);
+    stop("null oop passed to encode_heap_oop_not_null");
+    bind(ok);
+  }
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop_not_null");
+  subq(r, r12_heapbase);
+  shrq(r, LogMinObjAlignmentInBytes);
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
+  assert (UseCompressedOops, "should be compressed");
+#ifdef ASSERT
+  if (CheckCompressedOops) {
+    Label ok;
+    testq(src, src);
+    jcc(Assembler::notEqual, ok);
+    stop("null oop passed to encode_heap_oop_not_null2");
+    bind(ok);
+  }
+#endif
+  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
+  if (dst != src) {
+    movq(dst, src);
+  }
+  subq(dst, r12_heapbase);
+  shrq(dst, LogMinObjAlignmentInBytes);
+}
+
+void  MacroAssembler::decode_heap_oop(Register r) {
+  assert (UseCompressedOops, "should be compressed");
+#ifdef ASSERT
+  if (CheckCompressedOops) {
+    Label ok;
+    push(rscratch1);
+    cmpptr(r12_heapbase,
+           ExternalAddress((address)Universe::heap_base_addr()));
+    jcc(Assembler::equal, ok);
+    stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
+    bind(ok);
+    pop(rscratch1);
+  }
+#endif
+
+  Label done;
+  shlq(r, LogMinObjAlignmentInBytes);
+  jccb(Assembler::equal, done);
+  addq(r, r12_heapbase);
+#if 0
+   // alternate decoding probably a wash.
+   testq(r, r);
+   jccb(Assembler::equal, done);
+   leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
+#endif
+  bind(done);
+  verify_oop(r, "broken oop in decode_heap_oop");
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
+  leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
+  leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+}
+
+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
+}
+
+void MacroAssembler::reinit_heapbase() {
+  if (UseCompressedOops) {
+    movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
+  }
+}
+#endif // _LP64
+*/
+SkipIfEqual::SkipIfEqual(
+    MacroAssembler* masm, const bool* flag_addr, bool value) {
+  _masm = masm;
+  _masm->move(AT, (int32_t)flag_addr);
+  _masm->lb(AT,AT,0);
+  _masm->addi(AT,AT,-value);
+  _masm->beq(AT,ZERO,_label);
+  _masm->delayed()->nop();
+}
+
+SkipIfEqual::~SkipIfEqual() {
+  _masm->bind(_label);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/assembler_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,1548 @@
+/*
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class BiasedLockingCounters;
+
+
+// Note: A register location is represented via a Register, not
+//       via an address for efficiency & simplicity reasons.
+
+class ArrayAddress;
+
+class Address VALUE_OBJ_CLASS_SPEC {
+
+public:
+  enum ScaleFactor {
+    no_scale = -1,
+    times_1  =  0,
+    times_2  =  1,
+    times_4  =  2,
+    times_8  =  3,
+    times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
+  };
+
+ private:
+  Register         _base;
+  Register         _index;
+  ScaleFactor      _scale;
+  int              _disp;
+  RelocationHolder _rspec;
+
+  // Easily misused constructors make them private
+  // %%% can we make these go away?
+  NOT_LP64(Address(address loc, RelocationHolder spec);)
+  Address(int disp, address loc, relocInfo::relocType rtype);
+  Address(int disp, address loc, RelocationHolder spec);
+
+ public:
+
+ int disp() { return _disp; }
+  // creation
+  Address()
+    : _base(noreg),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(0) {
+  }
+
+  // No default displacement otherwise Register can be implicitly
+  // converted to 0(Register) which is quite a different animal.
+
+  Address(Register base, int disp)
+    : _base(base),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(disp) {
+  }
+
+  Address(Register base)
+   : _base(base),
+     _index(noreg),
+     _scale(no_scale),
+     _disp(0) {
+  }
+
+  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
+    : _base (base),
+      _index(index),
+      _scale(scale),
+      _disp (disp) {
+    assert(!index->is_valid() == (scale == Address::no_scale),
+           "inconsistent address");
+  }
+
+  // The following two overloads are used in connection with the
+  // ByteSize type (see sizes.hpp).  They simplify the use of
+  // ByteSize'd arguments in assembly code. Note that their equivalent
+  // for the optimized build are the member functions with int disp
+  // argument since ByteSize is mapped to an int type in that case.
+  //
+  // Note: DO NOT introduce similar overloaded functions for WordSize
+  // arguments as in the optimized mode, both ByteSize and WordSize
+  // are mapped to the same type and thus the compiler cannot make a
+  // distinction anymore (=> compiler errors).
+
+#ifdef ASSERT
+  Address(Register base, ByteSize disp)
+    : _base(base),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(in_bytes(disp)) {
+  }
+
+  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
+    : _base(base),
+      _index(index),
+      _scale(scale),
+      _disp(in_bytes(disp)) {
+    assert(!index->is_valid() == (scale == Address::no_scale),
+           "inconsistent address");
+  }
+#endif // ASSERT
+
+  // accessors
+  bool        uses(Register reg) const { return _base == reg || _index == reg; }
+  Register    base()             const { return _base;  }
+  Register    index()            const { return _index; }
+  ScaleFactor scale()            const { return _scale; }
+  int         disp()             const { return _disp;  }
+
+  // Convert the raw encoding form into the form expected by the constructor for
+  // Address.  An index of 4 (rsp) corresponds to having no index, so convert
+  // that to noreg for the Address constructor.
+  //static Address make_raw(int base, int index, int scale, int disp);
+
+  static Address make_array(ArrayAddress);
+
+/*
+ private:
+  bool base_needs_rex() const {
+    return _base != noreg && _base->encoding() >= 8;
+  }
+
+  bool index_needs_rex() const {
+    return _index != noreg &&_index->encoding() >= 8;
+  }
+
+  relocInfo::relocType reloc() const { return _rspec.type(); }
+*/
+  friend class Assembler;
+  friend class MacroAssembler;
+  friend class LIR_Assembler; // base/index/scale/disp
+};
+
+
+// Calling convention
+class Argument VALUE_OBJ_CLASS_SPEC {
+ private:
+	int _number;
+ public:
+	enum {
+		n_register_parameters = 4,   // 4 integer registers used to pass parameters
+		n_float_register_parameters = 4   // 4 float registers used to pass parameters
+	};
+
+	Argument(int number):_number(number){ }
+	Argument successor() {return Argument(number() + 1);}
+
+	int number()const {return _number;}
+	bool is_Register()const {return _number < n_register_parameters;}
+	bool is_FloatRegister()const {return _number < n_float_register_parameters;}
+
+	Register as_Register()const {
+		assert(is_Register(), "must be a register argument");
+		return ::as_Register(A0->encoding() + _number);
+	}
+	FloatRegister  as_FloatRegister()const {
+		assert(is_FloatRegister(), "must be a float register argument");
+		return ::as_FloatRegister(F12->encoding() + _number);
+	}
+
+	Address as_caller_address()const {return Address(SP, number()* wordSize);}
+};
+
+
+
+//
+// AddressLiteral has been split out from Address because operands of this type
+// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
+// the few instructions that need to deal with address literals are unique and the
+// MacroAssembler does not have to implement every instruction in the Assembler
+// in order to search for address literals that may need special handling depending
+// on the instruction and the platform. As small step on the way to merging i486/amd64
+// directories.
+//
+class AddressLiteral VALUE_OBJ_CLASS_SPEC {
+  friend class ArrayAddress;
+  RelocationHolder _rspec;
+  // Typically we use AddressLiterals we want to use their rval
+  // However in some situations we want the lval (effect address) of the item.
+  // We provide a special factory for making those lvals.
+  bool _is_lval;
+
+  // If the target is far we'll need to load the ea of this to
+  // a register to reach it. Otherwise if near we can do rip
+  // relative addressing.
+
+  address          _target;
+
+ protected:
+  // creation
+  AddressLiteral()
+    : _is_lval(false),
+      _target(NULL)
+  {}
+
+  public:
+
+
+  AddressLiteral(address target, relocInfo::relocType rtype);
+
+  AddressLiteral(address target, RelocationHolder const& rspec)
+    : _rspec(rspec),
+      _is_lval(false),
+      _target(target)
+  {}
+
+  AddressLiteral addr() {
+    AddressLiteral ret = *this;
+    ret._is_lval = true;
+    return ret;
+  }
+
+
+ private:
+
+  address target() { return _target; }
+  bool is_lval() { return _is_lval; }
+
+  relocInfo::relocType reloc() const { return _rspec.type(); }
+  const RelocationHolder& rspec() const { return _rspec; }
+
+  friend class Assembler;
+  friend class MacroAssembler;
+  friend class Address;
+  friend class LIR_Assembler;
+};
+
+// Convience classes
+class RuntimeAddress: public AddressLiteral {
+
+  public:
+
+  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
+
+};
+
+class OopAddress: public AddressLiteral {
+
+  public:
+
+  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
+
+};
+
+class ExternalAddress: public AddressLiteral {
+
+  public:
+
+  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
+
+};
+
+class InternalAddress: public AddressLiteral {
+
+  public:
+
+  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
+
+};
+
+// x86 can do array addressing as a single operation since disp can be an absolute
+// address amd64 can't. We create a class that expresses the concept but does extra
+// magic on amd64 to get the final result
+
+class ArrayAddress VALUE_OBJ_CLASS_SPEC {
+  private:
+
+  AddressLiteral _base;
+  Address        _index;
+
+  public:
+
+  ArrayAddress() {};
+  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
+  AddressLiteral base() { return _base; }
+  Address index() { return _index; }
+
+};
+
+const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
+
+// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction
+// level ; i.e., what you write is what you get. The Assembler is generating code into
+// a CodeBuffer.
+
+class Assembler : public AbstractAssembler  {
+  friend class AbstractAssembler; // for the non-virtual hack
+  friend class LIR_Assembler; // as_Address()
+  friend class StubGenerator;
+
+  public:
+  enum ops {
+		special_op  = 0x00,
+		regimm_op   = 0x01,
+  	j_op        = 0x02,
+  	jal_op      = 0x03,
+	  beq_op      = 0x04,
+	  bne_op      = 0x05,
+	  blez_op     = 0x06,
+	  bgtz_op     = 0x07,
+	  addi_op     = 0x08,
+	  addiu_op    = 0x09,
+	  slti_op     = 0x0a,
+	  sltiu_op    = 0x0b,
+	  andi_op     = 0x0c,
+	  ori_op      = 0x0d,
+	  xori_op     = 0x0e,
+	  lui_op      = 0x0f,
+	  cop0_op     = 0x10,
+	  cop1_op     = 0x11,
+	  cop2_op     = 0x12,
+	  cop3_op     = 0x13,
+	  beql_op     = 0x14,
+	  bnel_op     = 0x15,
+	  blezl_op    = 0x16,
+	  bgtzl_op    = 0x17,
+	  daddi_op    = 0x18,
+	  daddiu_op   = 0x19,
+	  ldl_op      = 0x1a,
+	  ldr_op      = 0x1b,
+	  lb_op       = 0x20,
+	  lh_op       = 0x21,
+	  lwl_op      = 0x22,
+	  lw_op       = 0x23,
+	  lbu_op      = 0x24,
+	  lhu_op      = 0x25,
+    lwr_op      = 0x26,
+    lwu_op      = 0x27,
+    sb_op       = 0x28,
+    sh_op       = 0x29,
+    swl_op      = 0x2a,
+	  sw_op       = 0x2b,
+	  sdl_op      = 0x2c,
+	  sdr_op      = 0x2d,
+	  swr_op      = 0x2e,
+	  cache_op    = 0x2f,
+	  ll_op       = 0x30,
+	  lwc1_op     = 0x31,
+	  lld_op      = 0x34,
+	  ldc1_op     = 0x35,
+	  ld_op       = 0x37,
+	  sc_op       = 0x38,
+	  swc1_op     = 0x39,
+	  scd_op      = 0x3c,
+	  sdc1_op     = 0x3d,
+	  sd_op       = 0x3f
+  };
+
+	static	const char *ops_name[];
+
+	//special family, the opcode is in low 6 bits.
+	enum special_ops {
+		sll_op			= 0x00,
+		srl_op			= 0x02,
+		sra_op			= 0x03,
+		sllv_op			= 0x04,
+		srlv_op			= 0x06,
+		srav_op 		= 0x07,
+		jr_op				= 0x08,
+		jalr_op			= 0x09,
+		syscall_op	= 0x0c,
+		break_op		= 0x0d,
+		sync_op			= 0x0f,
+		mfhi_op			= 0x10,
+		mthi_op			= 0x11,
+		mflo_op			= 0x12,
+		mtlo_op			= 0x13,
+		dsllv_op		= 0x14,
+		dsrlv_op		= 0x16,
+		dsrav_op		= 0x17,
+		mult_op			= 0x18,
+		multu_op 		= 0x19,
+		div_op			= 0x1a,
+		divu_op			= 0x1b,
+		dmult_op		= 0x1c,
+		dmultu_op		= 0x1d,
+		ddiv_op			= 0x1e,
+		ddivu_op		= 0x1f,
+		add_op			= 0x20,
+		addu_op			= 0x21,
+		sub_op			= 0x22,
+		subu_op			= 0x23,
+		and_op			= 0x24,
+		or_op				= 0x25,
+		xor_op			= 0x26,
+		nor_op			= 0x27,
+		slt_op			= 0x2a,
+		sltu_op			= 0x2b,
+		dadd_op			= 0x2c,
+		daddu_op		= 0x2d,
+		dsub_op			= 0x2e,
+		dsubu_op		= 0x2f,
+		tge_op			= 0x30,
+		tgeu_op			= 0x31,
+		tlt_op			= 0x32,
+		tltu_op			= 0x33,
+		teq_op			= 0x34,
+		tne_op			= 0x36,
+		dsll_op			= 0x38,
+		dsrl_op			= 0x3a,
+		dsra_op			= 0x3b,
+		dsll32_op		= 0x3c,
+		dsrl32_op		= 0x3e,
+		dsra32_op		= 0x3f
+	};
+
+	static	const char* special_name[];
+
+	//regimm family, the opcode is in rt[16...20], 5 bits
+	enum regimm_ops {
+		bltz_op			= 0x00,
+		bgez_op			= 0x01,
+		bltzl_op		= 0x02,
+		bgezl_op		= 0x03,
+		tgei_op			= 0x08,
+		tgeiu_op		= 0x09,
+		tlti_op			= 0x0a,
+		tltiu_op		= 0x0b,
+		teqi_op			= 0x0c,
+		tnei_op			= 0x0e,
+		bltzal_op		= 0x10,
+		bgezal_op		= 0x11,
+		bltzall_op	= 0x12,
+		bgezall_op	= 0x13,
+	};
+
+	static	const char* regimm_name[];
+
+	//copx family,the op in rs, 5 bits
+	enum cop_ops {
+		mf_op				= 0x00,
+		dmf_op			= 0x01,
+		cf_op				= 0x02,
+		mt_op				= 0x04,
+		dmt_op			= 0x05,
+		ct_op				= 0x06,
+		bc_op				= 0x08,
+		single_fmt	= 0x10,
+		double_fmt	= 0x11,
+		word_fmt		= 0x14,
+		long_fmt		= 0x15
+	};
+
+	enum bc_ops {
+		bcf_op			= 0x00,
+		bct_op			= 0x01,
+		bcfl_op			= 0x02,
+		bctl_op			= 0x03,
+	};
+
+	enum c_conds {
+		f_cond			= 0x30,
+		un_cond			= 0x31,
+		eq_cond			= 0x32,
+		ueq_cond		= 0x33,
+		olt_cond		= 0x34,
+		ult_cond		= 0x35,
+		ole_cond		= 0x36,
+		ule_cond		= 0x37,
+		sf_cond			= 0x38,
+		ngle_cond		= 0x39,
+		seq_cond		= 0x3a,
+		ngl_cond		= 0x3b,
+		lt_cond			= 0x3c,
+		nge_cond		= 0x3d,
+		le_cond			= 0x3e,
+		ngt_cond		= 0x3f
+	};
+
+	//low 6 bits of cp1 instruction
+	enum float_ops {
+		fadd_op			= 0x00,
+		fsub_op			= 0x01,
+		fmul_op			= 0x02,
+		fdiv_op			= 0x03,
+		fsqrt_op		= 0x04,
+		fabs_op			= 0x05,
+		fmov_op			= 0x06,
+		fneg_op			= 0x07,
+		froundl_op	= 0x08,
+		ftruncl_op	= 0x09,
+		fceill_op		= 0x0a,
+		ffloorl_op	= 0x0b,
+		froundw_op 	= 0x0c,
+		ftruncw_op	= 0x0d,
+		fceilw_op 	= 0x0e,
+		ffloorw_op	= 0x0f,
+		fcvts_op		= 0x20,
+		fcvtd_op		= 0x21,
+		fcvtw_op		= 0x24,
+		fcvtl_op		= 0x25,
+	};
+
+	static const char* float_name[];
+
+	static int opcode(int insn) { return (insn>>26)&0x3f; }
+	static int rs(int insn) { return (insn>>21)&0x1f; }
+	static int rt(int insn) { return (insn>>16)&0x1f; }
+	static int rd(int insn) { return (insn>>11)&0x1f; }
+	static int sa(int insn) { return (insn>>6)&0x1f; }
+	static int special(int insn) { return insn&0x3f; }
+	static int imm_off(int insn) { return (short)low16(insn); }
+
+	static int low  (int x, int l) { return bitfield(x, 0, l); }
+	static int low16(int x)        { return low(x, 16); }
+	static int low26(int x)        { return low(x, 26); }
+
+protected:
+	//help methods for instruction ejection
+
+	//I-Type (Immediate)
+	// 31				 26 25        21 20      16 15    													0
+	//|   opcode   |			rs		|    rt    |						immediat						 |
+	//| 					 |						|					 |																 |
+	//			6							5					  5					 					16
+	static int insn_ORRI(int op, int rs, int rt, int imm) { return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); }
+
+	//R-Type (Register)
+	// 31				  26 25       21 20      16 15      11 10				 6 5			  0
+	//|   special   |			rs		|    rt    |	  rd	  | 		0		  |	 opcode  |
+	//| 0 0 0 0 0 0 |						|					 |					| 0 0 0 0 0 | 				 |
+	//			6							5					  5					 5					5						6
+	static int insn_RRRO(int rs, int rt, int rd,   int op) { return (rs<<21) | (rt<<16) | (rd<<11)  | op; }
+	static int insn_RRSO(int rt, int rd, int sa,   int op) { return (rt<<16) | (rd<<11) | (sa<<6)   | op; }
+	static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; }
+
+	static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); }
+	static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); }
+
+	static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) {
+		return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
+	}
+
+
+	//static int low  (int x, int l) { return bitfield(x, 0, l); }
+	//static int low16(int x)        { return low(x, 16); }
+	//static int low26(int x)        { return low(x, 26); }
+
+	static int high  (int x, int l) { return bitfield(x, 32-l, l); }
+	static int high16(int x)        { return high(x, 16); }
+	static int high6 (int x)        { return high(x, 6); }
+
+	//get the offset field of jump/branch instruction
+	int offset(address entry) {
+		assert(is_simm16((entry - _code_pos - 4) / 4), "change this code");
+		return (entry - _code_pos - 4) / 4;
+	}
+
+
+public:
+	using AbstractAssembler::offset;
+
+	//sign expand with the sign bit is h
+	static int expand(int x, int h) { return -(x & (1<<h)) | x;	}
+
+	// mips lui/addiu is both sign extended, so if you wan't to use off32/imm32, you have to use the follow three
+	// by yjl 6/22/2005
+	static int split_low(int x) {
+		return (x & 0xffff);
+	}
+
+	static int split_high(int x) {
+		return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
+	}
+
+	static int merge(int low, int high) {
+		return expand(low, 15) + (high<<16);
+	}
+
+	// modified by spark 2005/08/18
+	static bool is_simm  (int x, int nbits) { return -( 1 << nbits-1 )  <= x   &&   x  <  ( 1 << nbits-1 ); }
+	static bool is_simm16(int x)            { return is_simm(x, 16); }
+
+	// test if imm can be coded in a instruction with 16-bit imm/off
+	// by yjl 6/23/2005
+	/*static bool fit_in_insn(int imm) {
+		return imm == (short)imm;
+	}*/
+
+	static bool fit_in_jal(int offset) {
+		return is_simm(offset, 26);
+	}
+
+
+	// test if entry can be filled in the jl/jal,
+	// must be used just before you emit jl/jal
+	// by yjl 6/27/2005
+	bool fit_int_jal(address entry) {
+		return fit_in_jal(offset(entry));
+	}
+
+	bool fit_int_branch(address entry) {
+		return is_simm16(offset(entry));
+	}
+
+protected:
+#ifdef ASSERT
+	  #define CHECK_DELAY
+#endif
+#ifdef CHECK_DELAY
+	enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state;
+#endif
+
+public:
+	void assert_not_delayed() {
+#ifdef CHECK_DELAY
+		assert_not_delayed("next instruction should not be a delay slot");
+#endif
+	}
+
+	void assert_not_delayed(const char* msg) {
+#ifdef CHECK_DELAY
+		//guarantee( delay_state == no_delay, msg );
+		assert_msg ( delay_state == no_delay, msg);
+#endif
+	}
+
+protected:
+	// Delay slot helpers
+	// cti is called when emitting control-transfer instruction,
+	// BEFORE doing the emitting.
+	// Only effective when assertion-checking is enabled.
+
+	// called when emitting cti with a delay slot, AFTER emitting
+	void has_delay_slot() {
+#ifdef CHECK_DELAY
+		assert_not_delayed("just checking");
+		delay_state = at_delay_slot;
+#endif
+	}
+
+public:
+	Assembler* delayed() {
+#ifdef CHECK_DELAY
+		guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot");
+		delay_state = filling_delay_slot;
+#endif
+		return this;
+	}
+
+	void flush() {
+#ifdef CHECK_DELAY
+		guarantee( delay_state == no_delay, "ending code with a delay slot");
+#endif
+		AbstractAssembler::flush();
+	}
+
+	inline void emit_long(int);  // shadows AbstractAssembler::emit_long
+	inline void emit_data(int x) { emit_long(x); }
+	inline void emit_data(int, RelocationHolder const&);
+	inline void emit_data(int, relocInfo::relocType rtype);
+	inline void check_delay();
+
+
+  // Generic instructions
+  // Does 32bit or 64bit as needed for the platform. In some sense these
+  // belong in macro assembler but there is no need for both varieties to exist
+
+	void add(Register rd, Register rs, Register rt)  { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, add_op)); }
+	void addi(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(addi_op, (int)rs, (int)rt, imm)); }
+	void addiu(Register rt, Register rs, int imm)    { emit_long(insn_ORRI(addiu_op, (int)rs, (int)rt, imm)); }
+	void addu(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, addu_op)); }
+	void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, and_op)); }
+	void andi(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(andi_op, (int)rs, (int)rt, imm)); }
+
+	void beq    (Register rs, Register rt, int off) { emit_long(insn_ORRI(beq_op, (int)rs, (int)rt, off)); has_delay_slot(); }
+	void beql   (Register rs, Register rt, int off) { emit_long(insn_ORRI(beql_op, (int)rs, (int)rt, off)); has_delay_slot(); }
+	void bgez   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs, bgez_op, off)); has_delay_slot(); }
+	void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs, bgezal_op, off)); has_delay_slot(); }
+	void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs, bgezall_op, off)); has_delay_slot(); }
+	void bgezl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs, bgezl_op, off)); has_delay_slot(); }
+	void bgtz   (Register rs, int off) { emit_long(insn_ORRI(bgtz_op,   (int)rs, 0, off)); has_delay_slot(); }
+	void bgtzl  (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op,  (int)rs, 0, off)); has_delay_slot(); }
+	void blez   (Register rs, int off) { emit_long(insn_ORRI(blez_op,   (int)rs, 0, off)); has_delay_slot(); }
+	void blezl  (Register rs, int off) { emit_long(insn_ORRI(blezl_op,  (int)rs, 0, off)); has_delay_slot(); }
+	void bltz   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs, bltz_op, off)); has_delay_slot(); }
+	void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs, bltzal_op, off)); has_delay_slot(); }
+	void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs, bltzall_op, off)); has_delay_slot(); }
+	void bltzl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs, bltzl_op, off)); has_delay_slot(); }
+	void bne    (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op,  (int)rs, (int)rt, off)); has_delay_slot(); }
+	void bnel   (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs, (int)rt, off)); has_delay_slot(); }
+	void brk    (int code) { emit_long(break_op | (code<<16)); }
+
+	void beq    (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); }
+	void beql   (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));}
+	void bgez   (Register rs, address entry) { bgez   (rs, offset(entry)); }
+	void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); }
+	void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); }
+	void bgezl  (Register rs, address entry) { bgezl  (rs, offset(entry)); }
+	void bgtz   (Register rs, address entry) { bgtz   (rs, offset(entry)); }
+	void bgtzl  (Register rs, address entry) { bgtzl  (rs, offset(entry)); }
+	void blez   (Register rs, address entry) { blez   (rs, offset(entry)); }
+	void blezl  (Register rs, address entry) { blezl  (rs, offset(entry)); }
+	void bltz   (Register rs, address entry) { bltz   (rs, offset(entry)); }
+	void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); }
+	void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); }
+	void bltzl  (Register rs, address entry) { bltzl  (rs, offset(entry)); }
+	void bne    (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); }
+	void bnel   (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); }
+
+	void beq    (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); }
+	void beql   (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); }
+	void bgez   (Register rs, Label& L){ bgez   (rs, target(L)); }
+	void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); }
+	void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); }
+	void bgezl  (Register rs, Label& L){ bgezl  (rs, target(L)); }
+	void bgtz   (Register rs, Label& L){ bgtz   (rs, target(L)); }
+	void bgtzl  (Register rs, Label& L){ bgtzl  (rs, target(L)); }
+	void blez   (Register rs, Label& L){ blez   (rs, target(L)); }
+	void blezl  (Register rs, Label& L){ blezl  (rs, target(L)); }
+	void bltz   (Register rs, Label& L){ bltz   (rs, target(L)); }
+	void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); }
+	void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); }
+	void bltzl  (Register rs, Label& L){ bltzl  (rs, target(L)); }
+	void bne    (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); }
+	void bnel   (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); }
+
+	void dadd  (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, dadd_op)); }
+	void daddi (Register rt, Register rs, int imm)     { emit_long(insn_ORRI(daddi_op,  (int)rs, (int)rt, imm)); }
+	void daddiu(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(daddiu_op, (int)rs, (int)rt, imm)); }
+	void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, daddu_op)); }
+	void ddiv  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs, (int)rt, 0, ddiv_op));	}
+	void ddivu (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs, (int)rt, 0, ddivu_op)); }
+	void div   (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs, (int)rt, 0, div_op)); }
+	void divu  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs, (int)rt, 0, divu_op)); }
+	void dmfc0 (Register rt, FloatRegister rd)         { emit_long(insn_COP0(dmf_op, (int)rt, (int)rd)); }
+	void dmtc0 (Register rt, FloatRegister rd)         { emit_long(insn_COP0(dmt_op, (int)rt, (int)rd)); }
+	void dmult (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs, (int)rt, 0, dmult_op)); }
+	void dmultu(Register rs, Register rt)              { emit_long(insn_RRRO((int)rs, (int)rt, 0, dmultu_op)); }
+	void dsll  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt, (int)rd, sa, dsll_op)); }
+	void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, dsllv_op)); }
+	void dsll32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt, (int)rd, sa, dsll32_op)); }
+	void dsra  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt, (int)rd, sa, dsra_op)); }
+	void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, dsrav_op)); }
+	void dsra32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt, (int)rd, sa, dsra32_op)); }
+	void dsrl  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt, (int)rd, sa, dsrl_op)); }
+	void dsrlv (Register rd, Register rt, Register r)  { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, dsrlv_op)); }
+	void dsrl32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt, (int)rd, sa, dsrl32_op)); }
+	void dsub  (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, dsub_op)); }
+	void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, dsubu_op)); }
+
+	void b(int off)       { beq(ZERO, ZERO, off); }
+	void b(address entry) { b(offset(entry)); }
+	void b(Label& L)      { b(target(L)); }
+
+	void j(address entry);
+	void jal(address entry);
+
+	void jalr(Register rd, Register rs) { emit_long( ((int)rs<<21) | ((int)rd<<11) | jalr_op); has_delay_slot(); }
+	void jalr(Register rs)              { jalr(RA, rs); }
+	void jalr()                         { jalr(T9); }
+
+	void jr(Register rs) { emit_long(((int)rs<<21) | jr_op); has_delay_slot(); }
+
+	void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op,  (int)base, (int)rt, off)); }
+	void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base, (int)rt, off)); }
+	void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op,  (int)base, (int)rt, off)); }
+	void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base, (int)rt, off)); }
+	void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base, (int)rt, off)); }
+	void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op,  (int)base, (int)rt, off)); }
+	void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base, (int)rt, off)); }
+	void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op,  (int)base, (int)rt, off)); }
+	void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base, (int)rt, off)); }
+	void lui(Register rt, int imm)                { emit_long(insn_ORRI(lui_op, 0, (int)rt, imm)); }
+	void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op,  (int)base, (int)rt, off)); }
+	void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base, (int)rt, off)); }
+	void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base, (int)rt, off)); }
+	void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base, (int)rt, off)); }
+
+	void lb (Register rt, Address src);
+	void lbu(Register rt, Address src);
+	void ld (Register rt, Address src);
+	void ldl(Register rt, Address src);
+	void ldr(Register rt, Address src);
+	void lh (Register rt, Address src);
+	void lhu(Register rt, Address src);
+	void ll (Register rt, Address src);
+	void lld(Register rt, Address src);
+	void lw (Register rt, Address src);
+	void lwl(Register rt, Address src);
+	void lwr(Register rt, Address src);
+	void lwu(Register rt, Address src);
+	void lea(Register rt, Address src);
+
+	void mfc0 (Register rt, Register rd) { emit_long(insn_COP0(mf_op, (int)rt, (int)rd)); }
+	void mfhi (Register rd)              { emit_long( ((int)rd<<11) | mfhi_op ); }
+	void mflo (Register rd)              { emit_long( ((int)rd<<11) | mflo_op ); }
+	void mtc0 (Register rt, Register rd) { emit_long(insn_COP0(mt_op, (int)rt, (int)rd)); }
+	void mthi (Register rs)              { emit_long( ((int)rs<<21) | mthi_op ); }
+	void mtlo (Register rs)              { emit_long( ((int)rs<<21) | mtlo_op ); }
+	void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, 0, mult_op)); }
+	void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, 0, multu_op)); }
+
+	void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, nor_op)); }
+
+	void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, or_op)); }
+	void ori(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(ori_op, (int)rs, (int)rt, imm)); }
+
+	void sb   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sb_op,    (int)base, (int)rt, off)); }
+	void sc   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sc_op,    (int)base, (int)rt, off)); }
+	void scd  (Register rt, Register base, int off)     { emit_long(insn_ORRI(scd_op,   (int)base, (int)rt, off)); }
+	void sd   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sd_op,    (int)base, (int)rt, off)); }
+	void sdl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdl_op,   (int)base, (int)rt, off)); }
+	void sdr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdr_op,   (int)base, (int)rt, off)); }
+	void sh   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sh_op,    (int)base, (int)rt, off)); }
+	void sll  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt,  (int)rd,   sa,      sll_op)); }
+	void sllv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs,  (int)rt,   (int)rd, sllv_op)); }
+	void slt  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs,  (int)rt,   (int)rd, slt_op)); }
+	void slti (Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(slti_op,  (int)rs,   (int)rt, imm)); }
+	void sltiu(Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(sltiu_op, (int)rs,   (int)rt, imm)); }
+	void sltu (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs,  (int)rt,   (int)rd, sltu_op)); }
+	void sra  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt,  (int)rd,   sa,      sra_op)); }
+	void srav (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs,  (int)rt,   (int)rd, srav_op)); }
+	void srl  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt,  (int)rd,   sa,      srl_op)); }
+	void srlv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs,  (int)rt,   (int)rd, srlv_op)); }
+	void sub  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs,  (int)rt,   (int)rd, sub_op)); }
+	void subu (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs,  (int)rt,   (int)rd, subu_op)); }
+	void sw   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sw_op,    (int)base, (int)rt, off)); }
+	void swl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swl_op,   (int)base, (int)rt, off)); }
+	void swr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swr_op,   (int)base, (int)rt, off)); }
+	void sync ()                                        { emit_long(sync_op); }
+	void syscall(int code)                              { emit_long( (code<<6) | syscall_op ); }
+
+	void sb(Register rt, Address dst);
+	void sc(Register rt, Address dst);
+	void scd(Register rt, Address dst);
+	void sd(Register rt, Address dst);
+	void sdl(Register rt, Address dst);
+	void sdr(Register rt, Address dst);
+	void sh(Register rt, Address dst);
+	void sw(Register rt, Address dst);
+	void swl(Register rt, Address dst);
+	void swr(Register rt, Address dst);
+
+	void teq  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs,   int(rt), code, teq_op)); }
+	void teqi (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs, teqi_op, imm)); }
+	void tge  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs,   int(rt), code, tge_op)); }
+	void tgei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs, tgei_op, imm)); }
+	void tgeiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs, tgeiu_op, imm)); }
+	void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs,   int(rt), code, tgeu_op)); }
+	void tlt  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs,   int(rt), code, tlt_op)); }
+	void tlti (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs, tlti_op, imm)); }
+	void tltiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs, tltiu_op, imm)); }
+	void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs,   int(rt), code, tltu_op)); }
+	void tne  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs,   int(rt), code, tne_op)); }
+	void tnei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs, tnei_op, imm)); }
+
+	void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs, (int)rt, (int)rd, xor_op)); }
+	void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs, (int)rt, imm)); }
+
+	void nop() 														{ sll(ZERO, ZERO, 0); }
+	//float instructions for mips
+	void abs_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fabs_op));}
+	void abs_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fabs_op));}
+	void add_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, (int)fd, fadd_op));}
+	void add_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, (int)fd, fadd_op));}
+
+	void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc_op, bcf_op, off)); has_delay_slot(); }
+	void bc1fl(int off) {	emit_long(insn_ORRI(cop1_op, bc_op, bcfl_op, off)); has_delay_slot(); }
+	void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc_op, bct_op, off)); has_delay_slot(); }
+	void bc1tl(int off) {	emit_long(insn_ORRI(cop1_op, bc_op, bctl_op, off));	has_delay_slot(); }
+
+	void bc1f (address entry) { bc1f(offset(entry)); }
+	void bc1fl(address entry) {	bc1fl(offset(entry)); }
+	void bc1t (address entry) { bc1t(offset(entry)); }
+	void bc1tl(address entry) {	bc1tl(offset(entry)); }
+
+	void bc1f (Label& L) { bc1f(target(L)); }
+	void bc1fl(Label& L) {	bc1fl(target(L)); }
+	void bc1t (Label& L) { bc1t(target(L)); }
+	void bc1tl(Label& L) {	bc1tl(target(L)); }
+
+	void c_f_s   (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, f_cond)); }
+	void c_f_d   (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, f_cond)); }
+	void c_un_s  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, un_cond)); }
+	void c_un_d  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, un_cond)); }
+	void c_eq_s  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, eq_cond)); }
+	void c_eq_d  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, eq_cond)); }
+	void c_ueq_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, ueq_cond)); }
+	void c_ueq_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, ueq_cond)); }
+	void c_olt_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, olt_cond)); }
+	void c_olt_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, olt_cond)); }
+	void c_ult_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, ult_cond)); }
+	void c_ult_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, ult_cond)); }
+	void c_ole_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, ole_cond)); }
+	void c_ole_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, ole_cond)); }
+	void c_ule_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, ule_cond)); }
+	void c_ule_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, ule_cond)); }
+	void c_sf_s  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, sf_cond)); }
+	void c_sf_d  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, sf_cond)); }
+	void c_ngle_s(FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, ngle_cond)); }
+	void c_ngle_d(FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, ngle_cond)); }
+	void c_seq_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, seq_cond)); }
+	void c_seq_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, seq_cond)); }
+	void c_ngl_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, ngl_cond)); }
+	void c_ngl_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, ngl_cond)); }
+	void c_lt_s  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, lt_cond)); }
+	void c_lt_d  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, lt_cond)); }
+	void c_nge_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, nge_cond)); }
+	void c_nge_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, nge_cond)); }
+	void c_le_s  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, le_cond)); }
+	void c_le_d  (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, le_cond)); }
+	void c_ngt_s (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, 0, ngt_cond)); }
+	void c_ngt_d (FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, 0, ngt_cond)); }
+
+	void ceil_l_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fceill_op)); }
+	void ceil_l_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fceill_op)); }
+	void ceil_w_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fceilw_op)); }
+	void ceil_w_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fceilw_op)); }
+	void cfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(cf_op, (int)rt, (int)fs)); }
+	void ctc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(ct_op, (int)rt, (int)fs)); }
+
+	void cvt_d_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fcvtd_op)); }
+	void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs, (int)fd, fcvtd_op)); }
+	void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs, (int)fd, fcvtd_op)); }
+	void cvt_l_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fcvtl_op)); }
+	void cvt_l_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs, (int)fd, fcvtl_op)); }
+	void cvt_l_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fcvtl_op)); }
+	void cvt_s_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fcvts_op)); }
+	void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs, (int)fd, fcvts_op)); }
+	void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs, (int)fd, fcvts_op)); }
+	void cvt_w_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fcvtw_op)); }
+	void cvt_w_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fcvtw_op)); }
+	void cvt_w_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs, (int)fd, fcvtw_op)); }
+
+	void div_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, (int)fd, fdiv_op)); }
+	void div_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, (int)fd, fdiv_op)); }
+	void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmf_op, (int)rt, (int)fs)); }
+	void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmt_op, (int)rt, (int)fs)); }
+
+	void floor_l_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, ffloorl_op)); }
+	void floor_l_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, ffloorl_op)); }
+	void floor_w_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, ffloorw_op)); }
+	void floor_w_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, ffloorw_op)); }
+
+	void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base, (int)ft, off)); }
+	void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base, (int)ft, off)); }
+	void ldc1(FloatRegister ft, Address src);
+	void lwc1(FloatRegister ft, Address src);
+
+	void mfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(mf_op, (int)rt, (int)fs)); }
+	void mov_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fmov_op)); }
+	void mov_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fmov_op)); }
+	void mtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(mt_op, (int)rt, (int)fs)); }
+	void mul_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, (int)fd, fmul_op)); }
+	void mul_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, (int)fd, fmul_op)); }
+
+	void neg_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fneg_op)); }
+	void neg_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fneg_op)); }
+
+	void round_l_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, froundl_op)); }
+	void round_l_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, froundl_op)); }
+	void round_w_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, froundw_op)); }
+	void round_w_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, froundw_op)); }
+
+	void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base, (int)ft, off)); }
+	void sdc1(FloatRegister ft, Address dst);
+	void sqrt_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, fsqrt_op)); }
+	void sqrt_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, fsqrt_op)); }
+	void sub_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(single_fmt, (int)ft, (int)fs, (int)fd, fsub_op)); }
+	void sub_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) { emit_long(insn_F3RO(double_fmt, (int)ft, (int)fs, (int)fd, fsub_op)); }
+	void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base, (int)ft, off)); }
+	void swc1(FloatRegister ft, Address dst);
+
+	void trunc_l_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, ftruncl_op)); }
+	void trunc_l_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, ftruncl_op)); }
+	void trunc_w_s(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(single_fmt, 0, (int)fs, (int)fd, ftruncw_op)); }
+	void trunc_w_d(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(double_fmt, 0, (int)fs, (int)fd, ftruncw_op)); }
+
+	void int3();
+	static void print_instruction(int);
+	int patched_branch(int dest_pos, int inst, int inst_pos);
+	int branch_destination(int inst, int pos);
+
+public:
+	// Creation
+	Assembler(CodeBuffer* code) : AbstractAssembler(code) {
+#ifdef CHECK_DELAY
+	  delay_state = no_delay;
+#endif
+	}
+
+};
+
+
+// MacroAssembler extends Assembler by frequently used macros.
+//
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+class MacroAssembler: public Assembler {
+  friend class LIR_Assembler;
+  friend class Runtime1;      // as_Address()
+
+public:
+static intptr_t	i[32];
+static float	f[32];
+static void print(outputStream *s);
+
+static int i_offset(unsigned int k);
+static int f_offset(unsigned int k);
+
+static void save_registers(MacroAssembler *masm);
+static void restore_registers(MacroAssembler *masm);
+
+ protected:
+
+  Address as_Address(AddressLiteral adr);
+  Address as_Address(ArrayAddress adr);
+
+  // Support for VM calls
+  //
+  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+#ifdef CC_INTERP
+  // c++ interpreter never wants to use interp_masm version of call_VM
+  #define VIRTUAL
+#else
+  #define VIRTUAL virtual
+#endif
+
+  VIRTUAL void call_VM_leaf_base(
+    address entry_point,               // the entry point
+    int     number_of_arguments        // the number of arguments to pop after the call
+  );
+
+  // This is the base routine called by the different versions of call_VM. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+  //
+  // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
+  // returns the register which contains the thread upon return. If a thread register has been
+  // specified, the return value will correspond to that register. If no last_java_sp is specified
+  // (noreg) than rsp will be used instead.
+  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
+    address  entry_point,              // the entry point
+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
+    bool     check_exceptions          // whether to check for pending exceptions after return
+  );
+
+  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+  // The implementation is only non-empty for the InterpreterMacroAssembler,
+  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
+
+  // helpers for FPU flag access
+  // tmp is a temporary register, if none is available use noreg
+  //void save_rax   (Register tmp);
+  //void restore_rax(Register tmp);
+
+ public:
+  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+  // Support for NULL-checks
+  //
+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
+  // If the accessed location is M[reg + offset] and the offset is known, provide the
+  // offset. No explicit code generation is needed if the offset is within a certain
+  // range (0 <= offset <= page_size).
+  // use "teq 83, reg" in mips now, by yjl 6/20/2005
+  void null_check(Register reg, int offset = -1);
+  static bool needs_explicit_null_check(intptr_t offset);
+
+	// Required platform-specific helpers for Label::patch_instructions.
+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+  void pd_patch_instruction(address branch, address target);
+#ifndef PRODUCT
+  static void pd_print_patched_instruction(address branch);
+#endif
+
+  // Alignment
+  void align(int modulus);
+
+  // Misc
+  //void fat_nop(); // 5 byte nop
+
+  // Stack frame creation/removal
+  void enter();
+  void leave();
+
+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
+  // The pointer will be loaded into the thread register.
+  void get_thread(Register thread);
+
+
+  // Support for VM calls
+  //
+  // It is imperative that all calls into the VM are handled via the call_VM macros.
+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+
+  void call_VM(Register oop_result,
+               address entry_point,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  // Overloadings with last_Java_sp
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               int number_of_arguments = 0,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, bool
+               check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  void call_VM_leaf(address entry_point,
+                    int number_of_arguments = 0);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2, Register arg_3);
+
+  // last Java Frame (fills frame anchor)
+  void set_last_Java_frame(Register thread,
+                           Register last_java_sp,
+                           Register last_java_fp,
+                           address last_java_pc);
+
+  // thread in the default location (r15_thread on 64bit)
+  void set_last_Java_frame(Register last_java_sp,
+                           Register last_java_fp,
+                           address last_java_pc);
+
+  void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc);
+
+  // thread in the default location (r15_thread on 64bit)
+  void reset_last_Java_frame(bool clear_fp, bool clear_pc);
+
+  // Stores
+  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
+  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
+
+/*
+  void g1_write_barrier_pre(Register obj,
+#ifndef _LP64
+                            Register thread,
+#endif
+                            Register tmp,
+                            Register tmp2,
+                            bool     tosca_live);
+  void g1_write_barrier_post(Register store_addr,
+                             Register new_val,
+#ifndef _LP64
+                             Register thread,
+#endif
+                             Register tmp,
+                             Register tmp2);
+
+*/
+
+  // split store_check(Register obj) to enhance instruction interleaving
+  void store_check_part_1(Register obj);
+  void store_check_part_2(Register obj);
+
+  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
+  void c2bool(Register x);
+/*
+  // C++ bool manipulation
+
+  void movbool(Register dst, Address src);
+  void movbool(Address dst, bool boolconst);
+  void movbool(Address dst, Register src);
+  void testbool(Register dst);
+
+  // oop manipulations
+  void load_klass(Register dst, Register src);
+  void store_klass(Register dst, Register src);
+
+  void load_prototype_header(Register dst, Register src);
+
+#ifdef _LP64
+  void store_klass_gap(Register dst, Register src);
+
+  void load_heap_oop(Register dst, Address src);
+  void store_heap_oop(Address dst, Register src);
+  void encode_heap_oop(Register r);
+  void decode_heap_oop(Register r);
+  void encode_heap_oop_not_null(Register r);
+  void decode_heap_oop_not_null(Register r);
+  void encode_heap_oop_not_null(Register dst, Register src);
+  void decode_heap_oop_not_null(Register dst, Register src);
+
+  void set_narrow_oop(Register dst, jobject obj);
+
+  // if heap base register is used - reinit it with the correct value
+  void reinit_heapbase();
+#endif // _LP64
+
+  // Int division/remainder for Java
+  // (as idivl, but checks for special case as described in JVM spec.)
+  // returns idivl instruction offset for implicit exception handling
+  int corrected_idivl(Register reg);
+
+  // Long division/remainder for Java
+  // (as idivq, but checks for special case as described in JVM spec.)
+  // returns idivq instruction offset for implicit exception handling
+  int corrected_idivq(Register reg);
+*/
+
+  void int3();
+/*
+  // Long operation macros for a 32bit cpu
+  // Long negation for Java
+  void lneg(Register hi, Register lo);
+
+  // Long multiplication for Java
+  // (destroys contents of eax, ebx, ecx and edx)
+  void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
+
+  // Long shifts for Java
+  // (semantics as described in JVM spec.)
+  void lshl(Register hi, Register lo);                               // hi:lo << (rcx & 0x3f)
+  void lshr(Register hi, Register lo, bool sign_extension = false);  // hi:lo >> (rcx & 0x3f)
+
+  // Long compare for Java
+  // (semantics as described in JVM spec.)
+  void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
+
+
+  // misc
+*/
+  // Sign extension
+  void sign_extend_short(Register reg) 	{ shl(reg, 16); sar(reg, 16); }
+  void sign_extend_byte(Register reg)		{ shl(reg, 24); sar(reg, 24); }
+  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft);
+	void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft);
+
+  // Inlined sin/cos generator for Java; must not use CPU instruction
+  // directly on Intel as it does not have high enough precision
+  // outside of the range [-pi/4, pi/4]. Extra argument indicate the
+  // number of FPU stack slots in use; all but the topmost will
+  // require saving if a slow case is necessary. Assumes argument is
+  // on FP TOS; result is on FP TOS.  No cpu registers are changed by
+  // this code.
+  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
+/*
+  // branch to L if FPU flag C2 is set/not set
+  // tmp is a temporary register, if none is available use noreg
+  void jC2 (Register tmp, Label& L);
+  void jnC2(Register tmp, Label& L);
+
+  // Pop ST (ffree & fincstp combined)
+  void fpop();
+
+  // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
+  void push_fTOS();
+
+  // pops double TOS element from CPU stack and pushes on FPU stack
+  void pop_fTOS();
+
+  void empty_FPU_stack();
+
+  void push_IU_state();
+  void pop_IU_state();
+
+  void push_FPU_state();
+  void pop_FPU_state();
+
+  void push_CPU_state();
+  void pop_CPU_state();
+
+  // Round up to a power of two
+  void round_to(Register reg, int modulus);
+
+  // Callee saved registers handling
+  void push_callee_saved_registers();
+  void pop_callee_saved_registers();
+*/
+  // allocation
+  void eden_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+    );
+  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
+
+  //----
+  //  void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
+
+
+  // Debugging
+
+  // only if +VerifyOops
+  void verify_oop(Register reg, const char* s = "broken oop");
+  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
+  void verify_oop_subroutine();
+
+  // only if +VerifyFPU
+  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
+
+  // prints msg, dumps registers and stops execution
+  void stop(const char* msg);
+
+  // prints msg and continues
+  void warn(const char* msg);
+
+  static void debug(char* msg/*, RegistersForDebugging* regs*/);
+  static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
+
+  //void os_breakpoint();
+
+  void untested()                                { stop("untested"); }
+
+  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, sizeof(b), "unimplemented: %s", what);  stop(b); }
+
+  void should_not_reach_here()                   { stop("should not reach here"); }
+
+  void print_CPU_state();
+
+  // Stack overflow checking
+  void bang_stack_with_offset(int offset) {
+	  // stack grows down, caller passes positive offset
+	  assert(offset > 0, "must bang with negative offset");
+	  //movl(Address(rsp, (-offset)), rax);
+	  if (offset <= 32768) {
+		  sw(A0, SP, -offset);
+	  } else {
+		  move(AT, offset);
+		  sub(AT, SP, AT);
+		  sw(A0, AT, 0);
+	  }
+  }
+
+  	// Writes to stack successive pages until offset reached to check for
+  	// stack overflow + shadow pages.  Also, clobbers tmp
+  	void bang_stack_size(Register size, Register tmp);
+
+  	// Support for serializing memory accesses between threads
+  	void serialize_memory(Register thread, Register tmp);
+
+  	//void verify_tlab();
+  	void verify_tlab(Register t1, Register t2);
+
+  	// Biased locking support
+  	// lock_reg and obj_reg must be loaded up with the appropriate values.
+  	// swap_reg must be rax, and is killed.
+  	// tmp_reg is optional. If it is supplied (i.e., != noreg) it will
+  	// be killed; if not supplied, push/pop will be used internally to
+  	// allocate a temporary (inefficient, avoid if possible).
+  	// Optional slow case is for implementations (interpreter and C1) which branch to
+  	// slow case directly. Leaves condition codes set for C2's Fast_Lock node.
+  	// Returns offset of first potentially-faulting instruction for null
+  	// check info (currently consumed only by C1). If
+  	// swap_reg_contains_mark is true then returns -1 as it is assumed
+  	// the calling code has already passed any potential faults.
+  	int biased_locking_enter(Register lock_reg, Register obj_reg,
+			  Register swap_reg, Register tmp_reg,
+			  bool swap_reg_contains_mark,
+			  Label& done, Label* slow_case = NULL,
+			  BiasedLockingCounters* counters = NULL);
+	void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
+
+
+  // Calls
+
+	void call(address entry);
+	void call(address entry, relocInfo::relocType rtype);
+	void call(address entry, RelocationHolder& rh);
+	void jmp(address entry);
+	void jmp(address entry, relocInfo::relocType rtype);
+
+	// Argument ops
+	inline void store_int_argument(Register s, Argument& a);
+	inline void store_long_argument(Register s, Argument& a);
+	inline void store_float_argument(FloatRegister s, Argument& a);
+	inline void store_double_argument(FloatRegister s, Argument& a);
+	inline void store_ptr_argument(Register s, Argument& a);
+
+	// ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
+	// st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
+	inline void ld_ptr(Register rt, Address a);
+	inline void ld_ptr(Register rt, Register base, int offset16);
+	inline void st_ptr(Register rt, Address a);
+	inline void st_ptr(Register rt, Register base, int offset16);
+
+	void ld_ptr(Register rt, Register offset, Register base);
+	void st_ptr(Register rt, Register offset, Register base);
+
+	// ld_long will perform lw for 32 bit VMs and ld for 64 bit VMs
+	// st_long will perform sw for 32 bit VMs and sd for 64 bit VMs
+	inline void ld_long(Register rt, Register base, int offset16);
+	inline void st_long(Register rt, Register base, int offset16);
+	inline void ld_long(Register rt, Address a);
+	inline void st_long(Register rt, Address a);
+
+
+	void ld_long(Register rt, Register offset, Register base);
+	void st_long(Register rt, Register offset, Register base);
+	// Regular vs. d* versions
+	inline void addu_long(Register rd, Register rs, Register rt);
+	inline void addu_long(Register rd, Register rs, long imm32_64);
+
+	// Floating
+ public:
+	// swap the two byte of the low 16-bit halfword
+	// this directive will use AT, be sure the high 16-bit of reg is zero
+	// by yjl 6/28/2005
+	void hswap(Register reg);
+  	void huswap(Register reg);
+
+	// convert big endian integer to little endian integer
+  // by yjl 6/29/2005
+  	void swap(Register reg);
+
+  // implement the x86 instruction semantic
+  // if c_reg == *dest then *dest <= x_reg
+	// else c_reg <= *dest
+	// the AT indicate if xchg occurred, 1 for xchged, else  0
+	// by yjl 6/28/2005
+	void cmpxchg(Register x_reg, Address dest, Register c_reg);
+	void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi);
+
+	void neg(Register reg) { subu(reg, ZERO, reg); }
+
+	void extend_sign(Register rh, Register rl) { sra(rh, rl, 31); }
+
+	void round_to(Register reg, int modulus) {
+		assert_different_registers(reg, AT);
+		increment(reg, modulus - 1);
+		move(AT, - modulus);
+		andr(reg, reg, AT);
+	}
+
+	//pop & push, added by aoqi
+	void push (Register reg)      { sw  (reg, SP, -4); addi(SP, SP, -4); }
+	void push (FloatRegister reg) { swc1(reg, SP, -4); addi(SP, SP, -4); }
+	void pop  (Register reg)      { lw  (reg, SP, 0);  addi(SP, SP, 4); }
+	void pop  (FloatRegister reg) { lwc1(reg, SP, 0);  addi(SP, SP, 4); }
+	void push2(Register reg1, Register reg2);
+	void pop2 (Register reg1, Register reg2);
+	void pop  ()                  { addi(SP, SP, 4); }
+	void pop2 ()                  { addi(SP, SP, 8); }
+
+	//move an 32-bit immediate to Register
+	void move(Register reg, int imm);
+	void move(Register rd, Register rs)   { add(rd, rs, ZERO); }
+
+  	void shl(Register reg, int sa)        { sll(reg, reg, sa); }
+  	void shr(Register reg, int sa)        { srl(reg, reg, sa); }
+  	void sar(Register reg, int sa)        { sra(reg, reg, sa); }
+
+	// the follow two might use AT register, be sure you have no meanful data in AT before you call them
+	// by yjl 6/23/2005
+	void increment(Register reg, int imm);
+	void decrement(Register reg, int imm);
+
+	//FIXME
+  	void empty_FPU_stack(){/*need implemented*/};
+
+//we need 2 fun to save and resotre general register
+	void pushad();
+	void popad();
+
+	void  load_two_bytes_from_at_bcp(Register reg, Register tmp, int offset);
+	void  store_two_byts_to_at_bcp(Register reg, Register tmp, int offset);
+#undef VIRTUAL
+
+};
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+  MacroAssembler* _masm;
+  Label _label;
+
+ public:
+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
+   ~SkipIfEqual();
+};
+
+#ifdef ASSERT
+inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,186 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline void MacroAssembler::pd_patch_instruction(address branch, address target) {
+  jint& stub_inst = *(jint*) branch;
+  stub_inst = patched_branch(target - branch, stub_inst, 0);
+}
+
+#ifndef PRODUCT
+inline void MacroAssembler::pd_print_patched_instruction(address branch) {
+  jint stub_inst = *(jint*) branch;
+  print_instruction(stub_inst);
+  ::tty->print("%s", " (unresolved)");
+}
+#endif // PRODUCT
+
+//inline bool Address::is_simm13(int offset) { return Assembler::is_simm13(disp() + offset); }
+
+
+
+inline void Assembler::check_delay() {
+# ifdef CHECK_DELAY
+//  guarantee( delay_state != at_delay_slot, "must say delayed() when filling delay slot");
+  delay_state = no_delay;
+# endif
+}
+
+inline void Assembler::emit_long(int x) {
+  check_delay();
+  AbstractAssembler::emit_long(x);
+}
+
+inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
+  relocate(rtype);
+  emit_long(x);
+}
+
+inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
+  relocate(rspec);
+  emit_long(x);
+}
+
+inline void MacroAssembler::store_int_argument(Register s, Argument &a) {
+	if(a.is_Register()) {
+		move(a.as_Register(), s);
+	} else {
+		sw(s, a.as_caller_address());
+	}
+}
+
+inline void MacroAssembler::store_long_argument(Register s, Argument &a) {
+	Argument a1 = a.successor();
+	if(a.is_Register() && a1.is_Register()) {
+		move(a.as_Register(), s);
+		move(a.as_Register(), s);
+	} else {
+		sd(s, a.as_caller_address());
+	}
+}
+
+/*inline void MacroAssembler::store_float_argument(Register s, Argument &a) {
+	if(a.is_Register()) {
+		mov_s(a.as_FloatRegister(), s);
+	} else {
+		swc1(s, a.as_caller_address());
+	}
+}
+
+inline void MacroAssembler::store_double_argument(Register s, Argument &a) {
+	if(a.is_Register()) {
+		mov_d(a.as_FloatRegister(), s);
+	} else {
+		sdc1(s, a.as_caller_address());
+	}
+}*/
+
+inline void MacroAssembler::store_ptr_argument(Register s, Argument &a) {
+	if(a.is_Register()) {
+		move(a.as_Register(), s);
+	} else {
+
+		st_ptr(s, a.as_caller_address());
+	}
+}
+inline void MacroAssembler::ld_ptr(Register rt, Register base, int offset16) {
+#ifdef _LP64
+  ld(rt, base, offset16);
+#else
+  lw(rt, base, offset16);
+#endif
+}
+inline void MacroAssembler::ld_ptr(Register rt, Address a) {
+#ifdef _LP64
+  ld(rt, a.base(), a.disp());
+#else
+  lw(rt, a.base(), a.disp());
+#endif
+}
+
+inline void MacroAssembler::st_ptr(Register rt, Address a) {
+#ifdef _LP64
+  sd(rt, a.base(), a.disp());
+#else
+  sw(rt, a.base(), a.disp());
+#endif
+}
+
+inline void MacroAssembler::st_ptr(Register rt, Register base, int offset16) {
+#ifdef _LP64
+  sd(rt, base, offset16);
+#else
+  sw(rt, base, offset16);
+#endif
+}
+
+inline void MacroAssembler::ld_long(Register rt, Register base, int offset16) {
+#ifdef _LP64
+  ld(rt, base, offset16);
+#else
+  lw(rt, base, offset16);
+#endif
+}
+
+inline void MacroAssembler::st_long(Register rt, Register base, int offset16) {
+#ifdef _LP64
+  sd(rt, base, offset16);
+#else
+  sw(rt, base, offset16);
+#endif
+}
+
+inline void MacroAssembler::ld_long(Register rt, Address a) {
+#ifdef _LP64
+  ld(rt, a.base(), a.disp());
+#else
+  lw(rt, a.base(), a.disp());
+#endif
+}
+
+inline void MacroAssembler::st_long(Register rt, Address a) {
+#ifdef _LP64
+  sd(rt, a.base(), a.disp());
+#else
+  sw(rt, a.base(), a.disp());
+#endif
+}
+
+inline void MacroAssembler::addu_long(Register rd, Register rs, Register rt) {
+#ifdef _LP64
+  daddu(rd, rs, rt);
+#else
+  addu(rd, rs, rt);
+#endif
+}
+
+inline void MacroAssembler::addu_long(Register rd, Register rs, long imm32_64) {
+#ifdef _LP64
+  daddiu(rd, rs, imm32_64);
+#else
+  addiu(rd, rs, imm32_64);
+#endif
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_bytecodeInterpreter_mips.cpp.incl"
+
+#ifdef CC_INTERP
+
+#endif // CC_INTERP (all)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2002-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Platform specific for C++ based Interpreter
+#define LOTS_OF_REGS    /* Lets interpreter use plenty of registers */
+
+private:
+
+    // save the bottom of the stack after frame manager setup. For ease of restoration after return
+    // from recursive interpreter call
+    intptr_t*  _frame_bottom;             /* saved bottom of frame manager frame */
+    intptr_t* _last_Java_pc;              /* pc to return to in frame manager */
+    intptr_t* _sender_sp;                 /* sender's sp before stack (locals) extension */
+    interpreterState _self_link;          /*  Previous interpreter state  */ /* sometimes points to self??? */
+    double    _native_fresult;            /* save result of native calls that might return floats */
+    intptr_t  _native_lresult;            /* save result of native calls that might return handle/longs */
+public:
+
+    static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp);
+    inline intptr_t* sender_sp() {
+  	return _sender_sp;
+    }
+
+
+#define SET_LAST_JAVA_FRAME()
+
+#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0);
+
+/*
+ * Macros for accessing the stack.
+ */
+#undef STACK_INT
+#undef STACK_FLOAT
+#undef STACK_ADDR
+#undef STACK_OBJECT
+#undef STACK_DOUBLE
+#undef STACK_LONG
+// JavaStack Implementation
+
+
+#define GET_STACK_SLOT(offset)    (*((intptr_t*) &topOfStack[-(offset)]))
+#define STACK_SLOT(offset)    ((address) &topOfStack[-(offset)])
+#define STACK_ADDR(offset)    (*((address *) &topOfStack[-(offset)]))
+#define STACK_INT(offset)     (*((jint*) &topOfStack[-(offset)]))
+#define STACK_FLOAT(offset)   (*((jfloat *) &topOfStack[-(offset)]))
+#define STACK_OBJECT(offset)  (*((oop *) &topOfStack [-(offset)]))
+#define STACK_DOUBLE(offset)  (((VMJavaVal64*) &topOfStack[-(offset)])->d)
+#define STACK_LONG(offset)    (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
+
+#define SET_STACK_SLOT(value, offset)   (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
+#define SET_STACK_ADDR(value, offset)   (*((address *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_INT(value, offset)    (*((jint *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_FLOAT(value, offset)  (*((jfloat *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
+#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d =  \
+                                                 ((VMJavaVal64*)(addr))->d)
+#define SET_STACK_LONG(value, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
+#define SET_STACK_LONG_FROM_ADDR(addr, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l =  \
+                                                 ((VMJavaVal64*)(addr))->l)
+
+#define LOCALS_SLOT(offset)    ((intptr_t*)&locals[-(offset)])
+#define LOCALS_ADDR(offset)    ((address)locals[-(offset)])
+#define LOCALS_INT(offset)     (*((jint*)&locals[-(offset)]))
+#define LOCALS_FLOAT(offset)   (*((jfloat*)&locals[-(offset)]))
+#define LOCALS_OBJECT(offset)  ((oop)locals[-(offset)])
+#define LOCALS_DOUBLE(offset)  (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
+#define LOCALS_LONG(offset)    (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
+#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
+
+#define SET_LOCALS_SLOT(value, offset)    (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
+#define SET_LOCALS_ADDR(value, offset)    (*((address *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_INT(value, offset)     (*((jint *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_FLOAT(value, offset)   (*((jfloat *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_OBJECT(value, offset)  (*((oop *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
+#define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
+#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
+                                                  ((VMJavaVal64*)(addr))->d)
+#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
+                                                ((VMJavaVal64*)(addr))->l)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,280 @@
+/*
+ * Copyright 2002 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Inline interpreter functions for IA32
+
+inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
+inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
+inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
+inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
+inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); }
+
+inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
+
+inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+
+}
+
+inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
+  // x86 can do unaligned copies but not 64bits at a time
+  to[0] = from[0]; to[1] = from[1];
+}
+
+// The long operations depend on compiler support for "long long" on x86
+
+inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
+  return op1 + op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
+  return op1 & op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
+  // QQQ what about check and throw...
+  return op1 / op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
+  return op1 * op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
+  return op1 | op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
+  return op1 - op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
+  return op1 ^ op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
+  return op1 % op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
+  // CVM did this 0x3f mask, is the really needed??? QQQ
+  return ((unsigned long long) op1) >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
+  return op1 >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
+  return op1 << (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
+  return -op;
+}
+
+inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
+  return ~op;
+}
+
+inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
+  return (op <= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
+  return (op >= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
+  return (op == 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
+  return (op1 == op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
+  return (op1 != op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
+  return (op1 >= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
+  return (op1 <= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
+  return (op1 < op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
+  return (op1 > op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
+  return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
+}
+
+// Long conversions
+
+inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
+  return (jfloat) val;
+}
+
+inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
+  return (jint) val;
+}
+
+// Double Arithmetic
+
+inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
+  return op1 + op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
+  // Divide by zero... QQQ
+  return op1 / op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
+  return op1 * op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
+  return -op;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
+  return fmod(op1, op2);
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
+  return op1 - op2;
+}
+
+inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+}
+
+// Double Conversions
+
+inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
+  return (jfloat) val;
+}
+
+// Float Conversions
+
+inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
+  return (jdouble) op;
+}
+
+// Integer Arithmetic
+
+inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
+  return op1 + op2;
+}
+
+inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
+  return op1 & op2;
+}
+
+inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
+  /* it's possible we could catch this special case implicitly */
+  if (op1 == 0x80000000 && op2 == -1) return op1;
+  else return op1 / op2;
+}
+
+inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
+  return op1 * op2;
+}
+
+inline jint BytecodeInterpreter::VMintNeg(jint op) {
+  return -op;
+}
+
+inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
+  return op1 | op2;
+}
+
+inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
+  /* it's possible we could catch this special case implicitly */
+  if (op1 == 0x80000000 && op2 == -1) return 0;
+  else return op1 % op2;
+}
+
+inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
+  return op1 <<  op2;
+}
+
+inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
+  return op1 >>  op2; // QQ op2 & 0x1f??
+}
+
+inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
+  return op1 - op2;
+}
+
+inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
+  return ((juint) op1) >> op2; // QQ op2 & 0x1f??
+}
+
+inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
+  return op1 ^ op2;
+}
+
+inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
+  return (jfloat) val;
+}
+
+inline jlong BytecodeInterpreter::VMint2Long(jint val) {
+  return (jlong) val;
+}
+
+inline jchar BytecodeInterpreter::VMint2Char(jint val) {
+  return (jchar) val;
+}
+
+inline jshort BytecodeInterpreter::VMint2Short(jint val) {
+  return (jshort) val;
+}
+
+inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
+  return (jbyte) val;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,38 @@
+/*
+ * Copyright 1998 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_bytecodes_mips.cpp.incl"
+
+
+void Bytecodes::pd_initialize() {
+  // No mips specific initialization
+}
+
+
+Bytecodes::Code Bytecodes::pd_base_code_for(Code code) {
+  // No mips specific bytecodes
+  return code;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,26 @@
+/*
+ * Copyright 1998 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// No Loongson specific bytecodes
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/bytes_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,175 @@
+/*
+ * Copyright 1997-2001 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Bytes: AllStatic {
+	private:
+		// Helper function for swap_u8, not used in Loongson.
+		static inline u8   swap_u8_base(u4 x, u4 y) {}         // compiler-dependent implementation
+
+	public:
+		// Returns true if the byte ordering used by Java is different from the native byte ordering
+		// of the underlying machine. For example, this is true for Intel x86, but false for Solaris
+		// on Sparc.
+		// we use mipsel, so return true
+		static inline bool is_Java_byte_ordering_different(){ return true; }
+
+
+		// Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
+		// (no special code is needed since x86 CPUs can access unaligned data)
+		static inline u2   get_native_u2(address p)         {
+			if ((int)p & 0x1) {
+				return ((u2)p[1] << 8) | (u2)p[0];
+			} else {
+				return *(u2*)p;
+			}
+		}
+
+		static inline u4   get_native_u4(address p)         {
+			if ((int)p&3) {
+				u4 res;
+				__asm__ __volatile__ (
+						" .set push\n"
+						" .set mips3\n"
+						" .set noreorder\n"
+
+						"		lwr %[res], 0(%[addr])		\n"
+						"		lwl	%[res], 3(%[addr])		\n"
+
+						" .set pop"
+						:	[res] "=&r" (res)
+						: [addr] "r" (p)
+						: "memory"
+						);
+				return res;
+			} else {
+				return *(u4*)p;
+			}
+		}
+
+		static inline u8   get_native_u8(address p)         {
+			u8 res;
+			u8 temp;
+			//	u4 tp;//tmp register
+			__asm__ __volatile__ (
+					" .set push\n"
+					" .set mips3\n"
+					" .set noreorder\n"
+					" .set noat\n"
+					"		andi $1,%[addr],0x7		\n"
+					"		beqz $1,1f				\n"
+					"		nop				\n"
+					"		ldr %[temp], 0(%[addr])		\n"
+					"		ldl	%[temp], 7(%[addr])	\n"
+					"               b 2f				\n"
+					"		nop				\n"
+					"	1:\t	ld	%[temp],0(%[addr])	\n"
+					"	2:\t 	sd	%[temp], %[res]		\n"
+
+					" .set at\n"
+					" .set pop\n"
+					:  [addr]"=r"(p), [temp]"=r" (temp)
+					:  "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res)
+					: "memory"
+					);
+
+			return res;
+		}
+		//use mips unaligned load instructions
+		static inline void put_native_u2(address p, u2 x)   {
+			if((int)p & 0x1) {
+				p[0] = (u_char)(x);
+				p[1] = (u_char)(x>>8);
+			} else {
+				*(u2*)p  = x;
+			}
+		}
+		static inline void put_native_u4(address p, u4 x)   {
+			*(u4*)p = x;
+		/*	if ((int)p&3) {
+				__asm__ __volatile__ (
+						" .set push\n"
+						" .set mips3\n"
+						" .set noreorder\n"
+
+						"		swr %[x], 0(%[addr])		\n"
+						"		swl	%[x], 3(%[addr])		\n"
+
+						" .set pop"
+						:
+						: [addr] "r" (p), [x] "r" (x)
+						: "memory"
+						);
+			} else {
+				*(u4*)p = x;
+			}*/
+		 }
+		static inline void put_native_u8(address p, u8 x)   {
+			//	u4 tp;//tmp register
+			*(u8*)p = x;
+			/*if ((int)p&7) {
+
+				__asm__ __volatile__ (
+						" .set push\n"
+						" .set mips3\n"
+						" .set noreorder\n"
+						" .set noat\n"
+						"		sdr %[x], 0(%[addr])		\n"
+						"		sdl	%[x], 7(%[addr])	\n"
+
+						" .set at\n"
+						" .set pop\n"
+						:
+						:  [addr] "r" (p), [x]"r" (x)
+						: "memory"
+						);
+			} else {
+
+				*(u8*)p = x;
+			}*/
+
+		}
+
+
+		// Efficient reading and writing of unaligned unsigned data in Java
+		// byte ordering (i.e. big-endian ordering). Byte-order reversal is
+		// needed since x86 CPUs use little-endian format.
+		static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
+		static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
+		static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
+
+		static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
+		static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
+		static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
+
+
+		// Efficient swapping of byte ordering
+		static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
+		static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
+		static inline u8   swap_u8(u8 x);
+};
+
+
+// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
+#include "incls/_bytes_pd.inline.hpp.incl"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_CodeStubs_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,592 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_c1_CodeStubs_mips.cpp.incl"
+
+
+#define __ ce->masm()->
+
+float ConversionStub::float_zero = 0.0;
+double ConversionStub::double_zero = 0.0;
+
+void ConversionStub::emit_code(LIR_Assembler* ce) {
+	/*
+	   __ bind(_entry);
+	   assert(bytecode() == Bytecodes::_f2i || bytecode() == Bytecodes::_d2i, "other conversions do not require stub");
+
+
+	   if (input()->is_single_xmm()) {
+	   __ comiss(input()->as_xmm_float_reg(),
+	   ExternalAddress((address)&float_zero));
+	   } else if (input()->is_double_xmm()) {
+	   __ comisd(input()->as_xmm_double_reg(),
+	   ExternalAddress((address)&double_zero));
+	   } else {
+	   LP64_ONLY(ShouldNotReachHere());
+	   __ push(rax);
+	   __ ftst();
+	   __ fnstsw_ax();
+	   __ sahf();
+	   __ pop(rax);
+	   }
+
+	   Label NaN, do_return;
+	   __ jccb(Assembler::parity, NaN);
+	   __ jccb(Assembler::below, do_return);
+
+	// input is > 0 -> return maxInt
+	// result register already contains 0x80000000, so subtracting 1 gives 0x7fffffff
+	__ decrement(result()->as_register());
+	__ jmpb(do_return);
+
+	// input is NaN -> return 0
+	__ bind(NaN);
+	__ xorptr(result()->as_register(), result()->as_register());
+
+	__ bind(do_return);
+	__ jmp(_continuation);
+	 */
+	__ bind(_entry);
+	assert(bytecode() == Bytecodes::_f2i || bytecode() == Bytecodes::_d2i, "other conversions do not require stub");
+}
+
+#ifdef TIERED
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+	__ bind(_entry);
+	ce->store_parameter(_bci, 0);
+	//__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+	__ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
+	ce->add_call_info_here(_info);
+	ce->verify_oop_map(_info);
+
+	//__ jmp(_continuation);
+	__ b(_continuation);
+	__ delayed()->nop();
+}
+#endif // TIERED
+
+
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+		bool throw_index_out_of_bounds_exception)
+	: _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
+	  , _index(index)
+{
+	_info = info == NULL ? NULL : new CodeEmitInfo(info);
+}
+
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+#ifdef OPT_RANGECHECK
+	if (_throw_pc != -1) {
+		ce->compilation()->null_check_table()->append(_throw_pc, __ offset());
+	}
+#endif
+	__ bind(_entry);
+	//// Pass the array index in eax since the runtime stub will add register state to the stack
+	// pass the array index on stack because all registers must be preserved
+
+	if (_index->is_cpu_register()) {
+		ce->store_parameter(_index->as_register(), 0);
+	} else {
+		ce->store_parameter(_index->as_jint(), 0);
+	}
+
+	if (_throw_index_out_of_bounds_exception) {
+		__ call(Runtime1::entry_for(Runtime1::throw_index_exception_id), relocInfo::runtime_call_type);
+	} else {
+		__ call(Runtime1::entry_for(Runtime1::throw_range_check_failed_id), relocInfo::runtime_call_type);
+	}
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	debug_only(__ should_not_reach_here());
+}
+
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+	if (_offset != -1) {
+		//		ce->compilation()->null_check_table()->append(_offset, __ offset());
+		ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+	}
+	__ bind(_entry);
+	__ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	debug_only(__ should_not_reach_here());
+
+}
+
+
+// Implementation of NewInstanceStub
+
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+	_result = result;
+	_klass = klass;
+	_klass_reg = klass_reg;
+	_info = new CodeEmitInfo(info);
+	assert(stub_id == Runtime1::new_instance_id                 ||
+			stub_id == Runtime1::fast_new_instance_id            ||
+			stub_id == Runtime1::fast_new_instance_init_check_id,
+			"need new_instance id");
+	_stub_id   = stub_id;
+}
+
+// i use T4 as klass register, V0 as result register. MUST accord with Runtime1::generate_code_for.
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+	assert(__ sp_offset() == 0, "frame size should be fixed");
+	__ bind(_entry);
+	//__ movptr(rdx, _klass_reg->as_register());
+	//__ call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
+	assert(_klass_reg->as_register() == T4, "klass_reg must in T4");
+
+
+	__ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	ce->verify_oop_map(_info);
+	assert(_result->as_register() == V0, "result must in V0,");
+	__ b(_continuation);
+	__ delayed()->nop();
+}
+
+
+// Implementation of NewTypeArrayStub
+
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+	_klass_reg = klass_reg;
+	_length = length;
+	_result = result;
+	_info = new CodeEmitInfo(info);
+}
+
+// i use T2 as length register, T4 as klass register, V0 as result register.
+// MUST accord with Runtime1::generate_code_for
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+	assert(__ sp_offset() == 0, "frame size should be fixed");
+	__ bind(_entry);
+	assert(_length->as_register() == T2, "length must in T2,");
+	assert(_klass_reg->as_register() == T4, "klass_reg must in T4");
+
+	//__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+	__ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	ce->verify_oop_map(_info);
+
+	assert(_result->as_register() == V0, "result must in V0,");
+	__ b(_continuation);
+	__ delayed()->nop();
+}
+
+
+// Implementation of NewObjectArrayStub
+
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+	_klass_reg = klass_reg;
+	_result = result;
+	_length = length;
+	_info = new CodeEmitInfo(info);
+}
+
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+	assert(__ sp_offset() == 0, "frame size should be fixed");
+	__ bind(_entry);
+	//assert(_length->as_register() == rbx, "length must in rbx,");
+	//assert(_klass_reg->as_register() == rdx, "klass_reg must in rdx");
+	//__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+	assert(_length->as_register() == T2, "length must in ebx");
+	assert(_klass_reg->as_register() == T4, "klass_reg must in edx");
+	__ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	ce->verify_oop_map(_info);
+	//assert(_result->as_register() == rax, "result must in rax,");
+	//__ jmp(_continuation);
+	assert(_result->as_register() == V0, "result must in eax");
+	__ b(_continuation);
+	__ delayed()->nop();
+}
+
+
+// Implementation of MonitorAccessStubs
+
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+: MonitorAccessStub(obj_reg, lock_reg)
+{
+	_info = new CodeEmitInfo(info);
+}
+
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+	assert(__ sp_offset() == 0, "frame size should be fixed");
+	__ bind(_entry);
+	ce->store_parameter(_obj_reg->as_register(),  1);
+	ce->store_parameter(_lock_reg->as_register(), 0);
+	/*
+	   Runtime1::StubID enter_id;
+	   if (ce->compilation()->has_fpu_code()) {
+	   enter_id = Runtime1::monitorenter_id;
+	   } else {
+	   enter_id = Runtime1::monitorenter_nofpu_id;
+	   }
+	   __ call(RuntimeAddress(Runtime1::entry_for(enter_id)));
+	 */
+	if (ce->compilation()->has_fpu_code()) {
+		__ call(Runtime1::entry_for(Runtime1::monitorenter_id), relocInfo::runtime_call_type);
+	} else {
+		__ call(Runtime1::entry_for(Runtime1::monitorenter_nofpu_id), relocInfo::runtime_call_type);
+	}
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	ce->verify_oop_map(_info);
+	//__ jmp(_continuation);
+	__ b(_continuation);
+	__ delayed()->nop();
+}
+
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+	__ bind(_entry);
+	if (_compute_lock) {
+		// lock_reg was destroyed by fast unlocking attempt => recompute it
+		ce->monitor_address(_monitor_ix, _lock_reg);
+	}
+	ce->store_parameter(_lock_reg->as_register(), 0);
+	// note: non-blocking leaf routine => no call info needed
+	/*
+	   Runtime1::StubID exit_id;
+	   if (ce->compilation()->has_fpu_code()) {
+	   exit_id = Runtime1::monitorexit_id;
+	   } else {
+	   exit_id = Runtime1::monitorexit_nofpu_id;
+	   }
+	   __ call(RuntimeAddress(Runtime1::entry_for(exit_id)));
+	   __ jmp(_continuation);
+	 */
+	if (ce->compilation()->has_fpu_code()) {
+		__ call(Runtime1::entry_for(Runtime1::monitorexit_id), relocInfo::runtime_call_type);
+	} else {
+		__ call(Runtime1::entry_for(Runtime1::monitorexit_nofpu_id), relocInfo::runtime_call_type);
+	}
+	__ delayed()->nop();
+
+	//__ jmp(_continuation);
+	__ b(_continuation);
+	__ delayed()->nop();
+}
+
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
+// - Replace original code with a call to the stub
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (especially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
+//int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
+int PatchingStub::_patch_info_offset = -(NativeCall::instruction_size + 4);
+
+void PatchingStub::align_patch_site(MacroAssembler* masm) {
+	// We're patching a 5-7 byte instruction on intel and we need to
+	// make sure that we don't see a piece of the instruction.  It
+	// appears mostly impossible on Intel to simply invalidate other
+	// processors caches and since they may do aggressive prefetch it's
+	// very hard to make a guess about what code might be in the icache.
+	// Force the instruction to be double word aligned so that it
+	// doesn't span a cache line.
+
+	// the NativeJump is not finished, i am not sure what to do here. FIXME
+	//masm->align(round_to(NativeGeneralJump::instruction_size, wordSize));
+}
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+//	assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, "not enough room for call");
+	assert(_bytes_to_copy <= 0xFF, "not enough room for call");
+
+	Label call_patch;
+
+	// static field accesses have special semantics while the class
+	// initializer is being run so we emit a test which can be used to
+	// check that this code is being executed by the initializing
+	// thread.
+	address being_initialized_entry = __ pc();
+	if (CommentedAssembly) {
+		__ block_comment(" patch template");
+	}
+	if (_id == load_klass_id) {
+		// produce a copy of the load klass instruction for use by the being initialized case
+		address start = __ pc();
+		jobject o = NULL;
+		int oop_index = __ oop_recorder()->allocate_index(o);
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(_obj, Assembler::split_high((int)o));
+		__ addiu(_obj, _obj, Assembler::split_low((int)o));
+#ifdef ASSERT
+		for (int i = 0; i < _bytes_to_copy; i++) {
+			address ptr = (address)(_pc_start + i);
+			int a_byte = (*ptr) & 0xFF;
+			assert(a_byte == *start++, "should be the same code");
+		}
+#endif
+	} else {
+
+		// make a copy the code which is going to be patched.
+		assert((_bytes_to_copy&3)==0, "change this code");
+		for ( int i = 0; i < _bytes_to_copy; i+=4) {
+			__ a_long (*(int*)(_pc_start + i));
+			//make the site look like a nop, @jerome
+			*(int*)(_pc_start + i)=0;
+		}
+	}
+
+	address end_of_patch = __ pc();
+	int bytes_to_skip = 0;
+	if (_id == load_klass_id) {
+		int offset = __ offset();
+		if (CommentedAssembly) {
+			__ block_comment(" being_initialized check");
+		}
+		/*   assert(_obj != noreg, "must be a valid register");
+		     Register tmp = eax;
+		     if (_obj == tmp) tmp = ebx;
+		     __ pushl(tmp);
+		     __ get_thread(tmp);
+		     __ cmpl(tmp, Address(_obj, instanceKlass::init_thread_offset_in_bytes()
+		     + sizeof(klassOopDesc)));
+		     __ popl(tmp);
+		     __ jcc(Assembler::notEqual, call_patch);
+		 */
+		assert(_obj != NOREG, "must be a valid register");
+#ifndef OPT_THREAD
+		//FIXME, T8 need be saved ?
+		Register thread = T8;
+		__ get_thread(thread);
+#else
+		Register thread = TREG;
+#endif
+		__ lw(AT, _obj, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc));
+		__ bne(thread, AT, call_patch);
+		__ delayed()->nop();
+
+		// access_field patches may execute the patched code before it's
+		// copied back into place so we need to jump back into the main
+		// code of the nmethod to continue execution.
+		/*		address temppc = __ pc();
+				__ b(_patch_site_continuation);
+				__ delayed()->nop();
+				bytes_to_skip += (__ pc() - temppc);
+		 */
+		__ b(_patch_site_continuation);
+		__ delayed()->nop();
+		bytes_to_skip += __ offset() - offset;
+
+	}
+
+	if (CommentedAssembly) {
+		__ block_comment("patch data encoded as movl");
+	}
+	// Now emit the patch record telling the runtime how to find the
+	// pieces of the patch.  We only need 3 bytes but for readability of
+	// the disassembly we make the data look like a movl reg, imm32,
+	// which requires 5 bytes
+	//int sizeof_patch_record = 5;
+	//for mips, I use a move instruction instead @jerome, 12/29, 06
+	int sizeof_patch_record = 4;
+	bytes_to_skip += sizeof_patch_record;
+
+	// emit the offsets needed to find the code to patch
+	int being_initialized_entry_offset = __ pc() - being_initialized_entry + patch_info_size;
+	// patch_info_pc offset | size of b instruction(8)| patched code size
+	assert((char)being_initialized_entry_offset==being_initialized_entry_offset, "just check");
+	assert((char)bytes_to_skip==bytes_to_skip, "just check");
+	assert((char)_bytes_to_copy==_bytes_to_copy, "just check");
+	__ a_long(being_initialized_entry_offset<<8 | (bytes_to_skip<<16) | (_bytes_to_copy<<24) );
+
+	address patch_info_pc = __ pc();
+	assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
+
+	address entry = __ pc();
+	NativeGeneralJump::insert_unconditional((address)_pc_start, entry);
+	address target = NULL;
+	switch (_id) {
+		case access_field_id:  target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
+		case load_klass_id:    target = Runtime1::entry_for(Runtime1::load_klass_patching_id); break;
+		default: ShouldNotReachHere();
+	}
+	__ bind(call_patch);
+
+
+	if (CommentedAssembly) {
+		__ block_comment("patch entry point");
+	}
+	//__ call(RuntimeAddress(target));
+	__ lui(T9, Assembler::split_high((int)target));
+	__ addiu(T9, T9, Assembler::split_low((int)target));
+	__ jalr(T9);
+	__ delayed()->nop();
+	assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change");
+	ce->add_call_info_here(_info);
+	int jmp_off = __ offset();
+	__ b(_patch_site_entry);
+	__ delayed()->nop();
+	// Add enough nops so deoptimization can overwrite the jmp above with a call
+	// and not destroy the world.
+	for (int j = __ offset() ; j < jmp_off + NativeCall::instruction_size + 4 ; j+=4 ) {
+		__ nop();
+	}
+	if (_id == load_klass_id) {
+		CodeSection* cs = __ code_section();
+		RelocIterator iter(cs, (address)_pc_start, (address)(_pc_start + 1));
+		relocInfo::change_reloc_info_for_address(&iter, (address) _pc_start, relocInfo::oop_type, relocInfo::none);
+	}
+}
+
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+	ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+	__ bind(_entry);
+	//__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id)));
+	__ call(Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	debug_only(__ should_not_reach_here());
+}
+
+
+// i dont know which register to use here, i just assume A1 here. FIXME
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+	assert(__ sp_offset() == 0, "frame size should be fixed");
+
+	__ bind(_entry);
+	// pass the object on stack because all registers must be preserved
+	if (_obj->is_cpu_register()) {
+		ce->store_parameter(_obj->as_register(), 0);
+	}
+	//__ call(RuntimeAddress(Runtime1::entry_for(_stub)));
+	__ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	debug_only(__ should_not_reach_here());
+}
+
+
+ArrayStoreExceptionStub::ArrayStoreExceptionStub(CodeEmitInfo* info):
+	_info(info) {
+	}
+
+
+void ArrayStoreExceptionStub::emit_code(LIR_Assembler* ce) {
+	assert(__ sp_offset() == 0, "frame size should be fixed");
+	__ bind(_entry);
+	//__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::throw_array_store_exception_id)));
+	__ call(Runtime1::entry_for(Runtime1::throw_array_store_exception_id), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	ce->add_call_info_here(_info);
+	debug_only(__ should_not_reach_here());
+}
+
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+	//---------------slow case: call to native-----------------
+	__ bind(_entry);
+	// Figure out where the args should go
+	// This should really convert the IntrinsicID to the methodOop and signature
+	// but I don't know how to do that.
+	//
+	VMRegPair args[5];
+	BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
+	SharedRuntime::java_calling_convention(signature, args, 5, true);
+
+	// push parameters
+	// (src, src_pos, dest, destPos, length)
+	Register r[5];
+	r[0] = src()->as_register();
+	r[1] = src_pos()->as_register();
+	r[2] = dst()->as_register();
+	r[3] = dst_pos()->as_register();
+	r[4] = length()->as_register();
+
+	// next registers will get stored on the stack
+	for (int i = 0; i < 5 ; i++ ) {
+		VMReg r_1 = args[i].first();
+		if (r_1->is_stack()) {
+			int st_off = r_1->reg2stack() * wordSize;
+			//__ movptr (Address(rsp, st_off), r[i]);
+			__ sw( r[i],  SP, st_off);
+		} else {
+			assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
+		}
+	}
+
+	ce->align_call(lir_static_call);
+
+	ce->emit_static_call_stub();
+	//AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
+	//                       relocInfo::static_call_type);
+	//__ call(resolve);
+	__ call(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type);
+	__ delayed()->nop();
+	ce->add_call_info_here(info());
+
+#ifndef PRODUCT
+	//__ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
+	__ lui(T8, Assembler::split_high((int)&Runtime1::_arraycopy_slowcase_cnt));
+	__ lw(AT, T8, Assembler::split_low((int)&Runtime1::_arraycopy_slowcase_cnt));
+	__ addiu(AT, AT, 1);
+	__ sw(AT, T8, Assembler::split_low((int)&Runtime1::_arraycopy_slowcase_cnt));
+#endif
+
+	__ b(_continuation);
+	__ delayed()->nop();
+}
+
+/////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+	Unimplemented();
+}
+/*
+   jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+   jbyte* G1PostBarrierStub::byte_map_base_slow() {
+   BarrierSet* bs = Universe::heap()->barrier_set();
+   assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+   "Must be if we're using this.");
+   return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+   }
+ */
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+	Unimplemented();
+}
+
+#endif // SERIALGC
+/////////////////////////////////////////////////////////////////////////////
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_Defs_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// native word offsets from memory address (little endian)
+enum {
+  pd_lo_word_offset_in_bytes = 0,
+  pd_hi_word_offset_in_bytes = BytesPerWord
+};
+
+// explicit rounding operations are required to implement the strictFP mode
+// i486 is true here, i dont think gs2 need this
+// by yjl 8/15/2005
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = 32,       // number of registers used during code emission
+	// v0, v1, t0-t7, s0-s7
+	// now, we just think s# as caller saved. maybe we should change this to allow cache local
+ // pd_nof_caller_save_cpu_regs_frame_map = 18,  // number of registers killed by calls
+	// t0-t7, s0-s7, v0, v1
+
+  pd_nof_caller_save_cpu_regs_frame_map = 18,  // number of registers killed by calls
+  pd_nof_cpu_regs_reg_alloc = 18,  // number of registers that are visible to register allocator
+  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
+  pd_first_cpu_reg = 0,
+  pd_last_cpu_reg = 31,
+  pd_last_allocatable_cpu_reg=23,
+	pd_first_callee_saved_reg = 0,
+	pd_last_callee_saved_reg = 13,
+
+	pd_nof_fpu_regs_frame_map = 16,  // number of registers used during code emission
+  pd_nof_fpu_regs_reg_alloc = 16,  // number of registers that are visible to register allocator
+  pd_nof_caller_save_fpu_regs_frame_map = 16,  // number of fpu registers killed by calls
+  pd_nof_fpu_regs_linearscan = 16,// number of registers visible linear scan
+  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg =  pd_nof_cpu_regs_frame_map + pd_nof_fpu_regs_frame_map - 1,
+
+  pd_nof_xmm_regs_linearscan = 0,
+	pd_nof_caller_save_xmm_regs = 0,
+	pd_first_xmm_reg = -1,
+	pd_last_xmm_reg = -1
+};
+
+
+// encoding of float value in debug info:
+enum {
+  pd_float_saved_as_double = true
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_FpuStackSim_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// No FPU stack on MIPS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_FpuStackSim_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+
+class FpuStackSim;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_FrameMap_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,426 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_c1_FrameMap_mips.cpp.incl"
+
+const int FrameMap::pd_c_runtime_reserved_arg_size = 0;
+
+
+FloatRegister FrameMap::_fpu_regs[32];
+LIR_Opr FrameMap::_a0_oop_opr;
+LIR_Opr FrameMap::_a1_oop_opr;
+LIR_Opr FrameMap::_a2_oop_opr;
+LIR_Opr FrameMap::_a3_oop_opr;
+
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
+	LIR_Opr opr = LIR_OprFact::illegalOpr;
+	VMReg r_1 = reg->first();
+	VMReg r_2 = reg->second();
+	if (r_1->is_stack()) {
+		// Convert stack slot to an SP offset
+		// The calling convention does not count the
+		// SharedRuntime::out_preserve_stack_slots() value
+		// so we must add it in here.
+		int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())
+			* VMRegImpl::stack_slot_size;
+		opr = LIR_OprFact::address(new LIR_Address(_sp_opr, st_off, type));
+	} else if (r_1->is_Register()) {
+		Register reg = r_1->as_Register();
+		if (r_2->is_Register()) {
+			Register reg2 = r_2->as_Register();
+			opr = as_long_opr(reg2, reg);
+		} else if (type == T_OBJECT) {
+			opr = as_oop_opr(reg);
+		} else {
+			opr = as_opr(reg);
+		}
+	} else if (r_1->is_FloatRegister()) {
+		assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+		int num = r_1->as_FloatRegister()->encoding();
+		if (type == T_FLOAT) {
+			opr =  LIR_OprFact::single_fpu(num);
+		} else {
+			opr =  LIR_OprFact::double_fpu(num);
+		}
+	} else {
+		 ShouldNotReachHere();
+	    }
+	return opr;
+}
+
+// some useful constant RInfo's:
+LIR_Opr FrameMap::_zero_opr;
+LIR_Opr FrameMap::_k0_opr;
+LIR_Opr FrameMap::_k1_opr;
+LIR_Opr FrameMap::_at_opr;
+LIR_Opr FrameMap::_v0_opr;
+LIR_Opr FrameMap::_v1_opr;
+LIR_Opr FrameMap::_a0_opr;
+LIR_Opr FrameMap::_a1_opr;
+LIR_Opr FrameMap::_a2_opr;
+LIR_Opr FrameMap::_a3_opr;
+LIR_Opr FrameMap::_t0_opr;
+LIR_Opr FrameMap::_t1_opr;
+LIR_Opr FrameMap::_t2_opr;
+LIR_Opr FrameMap::_t3_opr;
+LIR_Opr FrameMap::_t4_opr;
+LIR_Opr FrameMap::_t5_opr;
+LIR_Opr FrameMap::_t6_opr;
+LIR_Opr FrameMap::_t7_opr;
+LIR_Opr FrameMap::_t8_opr;
+LIR_Opr FrameMap::_t9_opr;
+LIR_Opr FrameMap::_s0_opr;
+LIR_Opr FrameMap::_s1_opr;
+LIR_Opr FrameMap::_s2_opr;
+LIR_Opr FrameMap::_s3_opr;
+LIR_Opr FrameMap::_s4_opr;
+LIR_Opr FrameMap::_s5_opr;
+LIR_Opr FrameMap::_s6_opr;
+LIR_Opr FrameMap::_s7_opr;
+LIR_Opr FrameMap::_gp_opr;
+LIR_Opr FrameMap::_fp_opr;
+LIR_Opr FrameMap::_sp_opr;
+LIR_Opr FrameMap::_ra_opr;
+
+LIR_Opr FrameMap::_a0_a1_opr;
+LIR_Opr FrameMap::_a2_a3_opr;
+LIR_Opr FrameMap::_v0_v1_opr;
+
+
+LIR_Opr FrameMap::_f0_opr;
+LIR_Opr FrameMap::_f12_opr;
+LIR_Opr FrameMap::_f14_opr;
+LIR_Opr FrameMap::_d0_opr;
+LIR_Opr FrameMap::_d12_opr;
+LIR_Opr FrameMap::_d14_opr;
+
+
+LIR_Opr FrameMap::receiver_opr;
+//caller saved register
+LIR_Opr FrameMap::_v0_oop_opr;
+LIR_Opr FrameMap::_v1_oop_opr;
+LIR_Opr FrameMap::_t0_oop_opr;
+LIR_Opr FrameMap::_t1_oop_opr;
+LIR_Opr FrameMap::_t2_oop_opr;
+LIR_Opr FrameMap::_t3_oop_opr;
+LIR_Opr FrameMap::_t4_oop_opr;
+LIR_Opr FrameMap::_t5_oop_opr;
+LIR_Opr FrameMap::_t6_oop_opr;
+LIR_Opr FrameMap::_t7_oop_opr;
+LIR_Opr FrameMap::_t8_oop_opr;
+LIR_Opr FrameMap::_t9_oop_opr;
+LIR_Opr FrameMap::_s0_oop_opr;
+LIR_Opr FrameMap::_s1_oop_opr;
+LIR_Opr FrameMap::_s2_oop_opr;
+LIR_Opr FrameMap::_s3_oop_opr;
+LIR_Opr FrameMap::_s4_oop_opr;
+LIR_Opr FrameMap::_s5_oop_opr;
+LIR_Opr FrameMap::_s6_oop_opr;
+LIR_Opr FrameMap::_s7_oop_opr;
+
+
+LIR_Opr FrameMap::_a0_a1_long_opr;
+LIR_Opr FrameMap::_a2_a3_long_opr;
+LIR_Opr FrameMap::_v0_v1_long_opr;
+LIR_Opr FrameMap::_f0_float_opr;
+LIR_Opr FrameMap::_f12_float_opr;
+LIR_Opr FrameMap::_f14_float_opr;
+LIR_Opr FrameMap::_d0_double_opr;
+LIR_Opr FrameMap::_d12_double_opr;
+LIR_Opr FrameMap::_d14_double_opr;
+
+
+
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+
+//--------------------------------------------------------
+//               FrameMap
+//--------------------------------------------------------
+FloatRegister FrameMap::nr2floatreg (int rnr) {
+	assert(_init_done, "tables not initialized");
+	debug_only(fpu_range_check(rnr);)
+	return _fpu_regs[rnr*2];
+}
+
+// returns true if reg could be smashed by a callee.
+bool FrameMap::is_caller_save_register (LIR_Opr reg) {
+	if (reg->is_single_fpu() || reg->is_double_fpu()) { return true; }
+	if (reg->is_double_cpu()) {
+		return is_caller_save_register(reg->as_register_lo()) ||
+			is_caller_save_register(reg->as_register_hi());
+	}
+	return is_caller_save_register(reg->as_register());
+}
+
+//FIXME, why always ture? @jerome
+bool FrameMap::is_caller_save_register (Register r) {
+//	return (r>=V0 && r<=T7) || (r==T8) || (r==T9);
+	//return ((r>=V0) && (r<=T7));
+	return true;
+}
+
+void FrameMap::init() {
+	if (_init_done) return;
+
+	assert(nof_cpu_regs == 32, "wrong number of CPU registers");
+	//init _cpu_regs for RegAlloc
+	int i = 0;
+map_register(8,T0);_t0_opr=LIR_OprFact::single_cpu(8);_t0_oop_opr=LIR_OprFact::single_cpu_oop(8);
+map_register(9,T1);_t1_opr=LIR_OprFact::single_cpu(9);_t1_oop_opr=LIR_OprFact::single_cpu_oop(9);
+map_register(10,T2);_t2_opr=LIR_OprFact::single_cpu(10);_t2_oop_opr=LIR_OprFact::single_cpu_oop(10);
+map_register(11,T3);_t3_opr=LIR_OprFact::single_cpu(11);_t3_oop_opr=LIR_OprFact::single_cpu_oop(11);
+map_register(12,T4);_t4_opr=LIR_OprFact::single_cpu(12);_t4_oop_opr=LIR_OprFact::single_cpu_oop(12);
+map_register(13,T5);_t5_opr=LIR_OprFact::single_cpu(13);_t5_oop_opr=LIR_OprFact::single_cpu_oop(13);
+map_register(14,T6);_t6_opr=LIR_OprFact::single_cpu(14);_t6_oop_opr=LIR_OprFact::single_cpu_oop(14);
+map_register(15,T7);_t7_opr=LIR_OprFact::single_cpu(15);_t7_oop_opr=LIR_OprFact::single_cpu_oop(15);
+map_register(16,S0);_s0_opr=LIR_OprFact::single_cpu(16);_s0_oop_opr=LIR_OprFact::single_cpu_oop(16);
+map_register(17,S1);_s1_opr=LIR_OprFact::single_cpu(17);_s1_oop_opr=LIR_OprFact::single_cpu_oop(17);
+map_register(18,S2);_s2_opr=LIR_OprFact::single_cpu(18);_s2_oop_opr=LIR_OprFact::single_cpu_oop(18);
+map_register(19,S3);_s3_opr=LIR_OprFact::single_cpu(19);_s3_oop_opr=LIR_OprFact::single_cpu_oop(19);
+map_register(20,S4);_s4_opr=LIR_OprFact::single_cpu(20);_s4_oop_opr=LIR_OprFact::single_cpu_oop(20);
+map_register(21,S5);_s5_opr=LIR_OprFact::single_cpu(21);_s5_oop_opr=LIR_OprFact::single_cpu_oop(21);
+map_register(22,S6);_s6_opr=LIR_OprFact::single_cpu(22);_s6_oop_opr=LIR_OprFact::single_cpu_oop(22);
+map_register(23,S7);_s7_opr=LIR_OprFact::single_cpu(23);_s7_oop_opr=LIR_OprFact::single_cpu_oop(23);
+map_register(2,V0);_v0_opr=LIR_OprFact::single_cpu(2);_v0_oop_opr=LIR_OprFact::single_cpu_oop(2);
+map_register(3,V1);_v1_opr=LIR_OprFact::single_cpu(3);_v1_oop_opr=LIR_OprFact::single_cpu_oop(3);
+  //------- visible to RegAlloc
+
+map_register(4,A0);_a0_opr=LIR_OprFact::single_cpu(4);_a0_oop_opr=LIR_OprFact::single_cpu_oop(4);
+
+map_register(5,A1);  _a1_opr=LIR_OprFact::single_cpu(5);_a1_oop_opr=LIR_OprFact::single_cpu_oop(5);
+
+map_register(6,A2);  _a2_opr=LIR_OprFact::single_cpu(6);_a2_oop_opr=LIR_OprFact::single_cpu_oop(6);
+
+map_register(7,A3);  _a3_opr=LIR_OprFact::single_cpu(7);_a3_oop_opr=LIR_OprFact::single_cpu_oop(7);
+
+
+  map_register(24,T8);  _t8_opr=LIR_OprFact::single_cpu(24);
+  map_register(25,T9);  _t9_opr=LIR_OprFact::single_cpu(25);
+
+  map_register(0,ZERO);  _zero_opr=LIR_OprFact::single_cpu(0);
+  map_register(1,AT);  _at_opr=LIR_OprFact::single_cpu(1);
+  map_register(26,K0);  _k0_opr=LIR_OprFact::single_cpu(26);
+  map_register(27,K1);  _k1_opr=LIR_OprFact::single_cpu(27);
+  map_register(28,GP);  _gp_opr=LIR_OprFact::single_cpu(28);
+  map_register(29,SP);  _sp_opr=LIR_OprFact::single_cpu(29);
+  map_register(30,FP);  _fp_opr=LIR_OprFact::single_cpu(30);
+  map_register(31,RA);  _ra_opr=LIR_OprFact::single_cpu(31);
+/*
+  _caller_save_cpu_regs[0] =  _v0_opr;
+  _caller_save_cpu_regs[1] =  _v1_opr;
+  _caller_save_cpu_regs[2] =  _a0_opr;
+  _caller_save_cpu_regs[3] =  _a1_opr;
+  _caller_save_cpu_regs[4] =  _a2_opr;
+  _caller_save_cpu_regs[5] =  _a3_opr;
+  _caller_save_cpu_regs[6] =  _t0_opr;
+  _caller_save_cpu_regs[7] =  _t1_opr;
+  _caller_save_cpu_regs[8] =  _t2_opr;
+  _caller_save_cpu_regs[9] =  _t3_opr;
+  _caller_save_cpu_regs[10] =  _t4_opr;
+  _caller_save_cpu_regs[11] =  _t5_opr;
+  _caller_save_cpu_regs[12] =  _t6_opr;
+  _caller_save_cpu_regs[13] =  _t7_opr;
+  _caller_save_cpu_regs[14] =  _s0_opr;
+  _caller_save_cpu_regs[15] =  _s1_opr;
+  _caller_save_cpu_regs[16] =  _s2_opr;
+  _caller_save_cpu_regs[17] =  _s3_opr;
+  _caller_save_cpu_regs[18] =  _s4_opr;
+  _caller_save_cpu_regs[19] =  _s5_opr;
+  _caller_save_cpu_regs[20] =  _s6_opr;
+  _caller_save_cpu_regs[21] =  _s7_opr;
+  _caller_save_cpu_regs[22] =  _v0_opr;
+  _caller_save_cpu_regs[23] =  _v1_opr;
+ */
+  _caller_save_cpu_regs[0] =  _t0_opr;
+  _caller_save_cpu_regs[1] =  _t1_opr;
+  _caller_save_cpu_regs[2] =  _t2_opr;
+  _caller_save_cpu_regs[3] =  _t3_opr;
+  _caller_save_cpu_regs[4] =  _t4_opr;
+  _caller_save_cpu_regs[5] =  _t5_opr;
+  _caller_save_cpu_regs[6] =  _t6_opr;
+  _caller_save_cpu_regs[7] =  _t7_opr;
+  _caller_save_cpu_regs[8] =  _s0_opr;
+  _caller_save_cpu_regs[9] =  _s1_opr;
+  _caller_save_cpu_regs[10] =  _s2_opr;
+  _caller_save_cpu_regs[11] =  _s3_opr;
+  _caller_save_cpu_regs[12] =  _s4_opr;
+  _caller_save_cpu_regs[13] =  _s5_opr;
+  _caller_save_cpu_regs[14] =  _s6_opr;
+  _caller_save_cpu_regs[15] =  _s7_opr;
+  _caller_save_cpu_regs[16] =  _v0_opr;
+  _caller_save_cpu_regs[17] =  _v1_opr;
+
+
+  _caller_save_fpu_regs[0] = LIR_OprFact::single_fpu(0);
+  _caller_save_fpu_regs[1] = LIR_OprFact::single_fpu(1);
+  _caller_save_fpu_regs[2] = LIR_OprFact::single_fpu(2);
+  _caller_save_fpu_regs[3] = LIR_OprFact::single_fpu(3);
+  _caller_save_fpu_regs[4] = LIR_OprFact::single_fpu(4);
+  _caller_save_fpu_regs[5] = LIR_OprFact::single_fpu(5);
+  _caller_save_fpu_regs[6] = LIR_OprFact::single_fpu(6);
+  _caller_save_fpu_regs[7] = LIR_OprFact::single_fpu(7);
+  _caller_save_fpu_regs[8] = LIR_OprFact::single_fpu(8);
+  _caller_save_fpu_regs[9] = LIR_OprFact::single_fpu(9);
+  _caller_save_fpu_regs[10] = LIR_OprFact::single_fpu(10);
+  _caller_save_fpu_regs[11] = LIR_OprFact::single_fpu(11);
+  _caller_save_fpu_regs[12] = LIR_OprFact::single_fpu(12);
+  _caller_save_fpu_regs[13] = LIR_OprFact::single_fpu(13);
+  _caller_save_fpu_regs[14] = LIR_OprFact::single_fpu(14);
+  _caller_save_fpu_regs[15] = LIR_OprFact::single_fpu(15);
+/*
+  _caller_save_fpu_regs[16] = LIR_OprFact::single_fpu(16);
+  _caller_save_fpu_regs[17] = LIR_OprFact::single_fpu(17);
+  _caller_save_fpu_regs[18] = LIR_OprFact::single_fpu(18);
+  _caller_save_fpu_regs[19] = LIR_OprFact::single_fpu(19);
+  _caller_save_fpu_regs[20] = LIR_OprFact::single_fpu(20);
+  _caller_save_fpu_regs[21] = LIR_OprFact::single_fpu(21);
+  _caller_save_fpu_regs[22] = LIR_OprFact::single_fpu(22);
+  _caller_save_fpu_regs[23] = LIR_OprFact::single_fpu(23);
+  _caller_save_fpu_regs[24] = LIR_OprFact::single_fpu(24);
+  _caller_save_fpu_regs[25] = LIR_OprFact::single_fpu(25);
+  _caller_save_fpu_regs[26] = LIR_OprFact::single_fpu(26);
+  _caller_save_fpu_regs[27] = LIR_OprFact::single_fpu(27);
+  _caller_save_fpu_regs[28] = LIR_OprFact::single_fpu(28);
+  _caller_save_fpu_regs[29] = LIR_OprFact::single_fpu(29);
+  _caller_save_fpu_regs[30] = LIR_OprFact::single_fpu(30);
+  _caller_save_fpu_regs[31] = LIR_OprFact::single_fpu(31);
+ */
+/*
+  _caller_save_fpu_regs[0] = LIR_OprFact::single_fpu(0);
+  _caller_save_fpu_regs[1] = LIR_OprFact::single_fpu(2);
+  _caller_save_fpu_regs[2] = LIR_OprFact::single_fpu(4);
+  _caller_save_fpu_regs[3] = LIR_OprFact::single_fpu(6);
+  _caller_save_fpu_regs[4] = LIR_OprFact::single_fpu(8);
+  _caller_save_fpu_regs[5] = LIR_OprFact::single_fpu(10);
+  _caller_save_fpu_regs[6] = LIR_OprFact::single_fpu(12);
+  _caller_save_fpu_regs[7] = LIR_OprFact::single_fpu(14);
+  _caller_save_fpu_regs[8] = LIR_OprFact::single_fpu(16);
+  _caller_save_fpu_regs[9] = LIR_OprFact::single_fpu(18);
+  _caller_save_fpu_regs[10] = LIR_OprFact::single_fpu(20);
+  _caller_save_fpu_regs[11] = LIR_OprFact::single_fpu(22);
+  _caller_save_fpu_regs[12] = LIR_OprFact::single_fpu(24);
+  _caller_save_fpu_regs[13] = LIR_OprFact::single_fpu(26);
+  _caller_save_fpu_regs[14] = LIR_OprFact::single_fpu(28);
+  _caller_save_fpu_regs[15] = LIR_OprFact::single_fpu(30);
+*/
+
+  // init _fpu_regs for RegAlloc, how many regs should be saved?
+  // @jerome
+ // for (int i = 0; i < nof_fpu_regs; i++) {
+
+  for (int i = 0; i < 32; i++) {
+	  _fpu_regs[i] = as_FloatRegister(i);
+  }
+//FIXME,
+
+  _a0_a1_long_opr=LIR_OprFact::double_cpu(4/*a0*/,5/*a1*/);
+  _a2_a3_long_opr=LIR_OprFact::double_cpu(6/*a2*/,7/*a3*/);
+  _v0_v1_long_opr=LIR_OprFact::double_cpu(2/*v0*/,3/*v1*/);
+  _f0_float_opr  =LIR_OprFact::single_fpu(0/*f0*/);
+  _f12_float_opr =LIR_OprFact::single_fpu(12/*f12*/);
+  _f14_float_opr =LIR_OprFact::single_fpu(14/*f14*/);
+  _d0_double_opr =LIR_OprFact::double_fpu(0/*f0*/);
+  _d12_double_opr=LIR_OprFact::double_fpu(12/*f12*/);
+  _d14_double_opr=LIR_OprFact::double_fpu(14/*f14*/);
+
+
+  _init_done = true;
+
+  VMRegPair regs;
+  BasicType sig_bt = T_OBJECT;
+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
+
+  receiver_opr = as_oop_opr(regs.first()->as_Register());
+  assert(receiver_opr == _t0_oop_opr, "rcvr ought to be t0");
+
+}
+
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  return Address(SP, in_bytes(sp_offset));
+}
+
+
+// ----------------mapping-----------------------
+// all mapping is based on rbp, addressing, except for simple leaf methods where we access
+// the locals rsp based (and no frame is built)
+
+
+// Frame for simple leaf methods (quick entries)
+//
+//   +----------+
+//   | ret addr |   <- TOS
+//   +----------+
+//   | args     |
+//   | ......   |
+
+// Frame for standard methods
+//
+//   | .........|  <- TOS
+//   | locals   |
+//   +----------+
+//   | old rbp,  |  <- EBP
+//   +----------+
+//   | ret addr |
+//   +----------+
+//   |  args    |
+//   | .........|
+
+
+// For OopMaps, map a local variable or spill index to an VMRegImpl name.
+// This is the offset from sp() in the frame of the slot for the index,
+// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
+//
+//           framesize +
+//           stack0         stack0          0  <- VMReg
+//             |              | <registers> |
+//  ...........|..............|.............|
+//      0 1 2 3 x x 4 5 6 ... |                <- local indices
+//      ^           ^        sp()                 ( x x indicate link
+//      |           |                               and return addr)
+//  arguments   non-argument locals
+
+VMReg FrameMap::fpu_regname (int n) {
+  // Return the OptoReg name for the fpu stack slot "n"
+  // A spilled fpu stack slot comprises to two single-word OptoReg's.
+  return as_FloatRegister(n)->as_VMReg();
+}
+
+LIR_Opr FrameMap::stack_pointer() {
+  //return FrameMap::esp_opr;
+  return FrameMap::_sp_opr;
+}
+
+
+bool FrameMap::validate_frame() {
+  return true;
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_FrameMap_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,177 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//  On i486/gs2 the frame looks as follows:
+//
+//  +----------------+---------+----------------------------+----------------+-----------
+//  | size_arguments | 2 words | size_locals-size_arguments | _size_monitors | spilling .
+//  +----------------+---------+----------------------------+----------------+-----------
+//
+//12/21, 06, jerome
+private:
+
+  //static FloatRegister  _fpu_regs [nof_fpu_regs];
+  static FloatRegister  _fpu_regs [32];
+
+  WordSize fp_offset_for_slot          (int slot) const;
+  int      local_to_slot               (int local_name, bool is_two_word) const;
+	// NOTE : name consist of argument, local, spill, they are not continuous
+  WordSize fp_offset_for_name          (int name, bool is_two_word, bool for_hi_word) const;
+  WordSize fp_offset_for_monitor_lock  (int monitor_index) const;
+  WordSize fp_offset_for_monitor_object(int monitor_index) const;
+  bool     location_for_fp_offset      (WordSize word_offset_from_fp,
+                                        Location::Type loc_type,
+                                        Location* loc) const;
+  WordSize fp2sp_offset                (WordSize fp_offset) const;
+
+
+ public:
+  static const int pd_c_runtime_reserved_arg_size;
+ enum {
+	  nof_reg_args 									= 5,   // registers t0,a0-a3 are available for parameter passing
+    first_available_sp_in_frame 	= 0,
+    frame_pad_in_bytes 						= 8
+  };
+
+  static LIR_Opr _zero_opr;
+  static LIR_Opr _at_opr;
+  static LIR_Opr _v0_opr;
+  static LIR_Opr _v1_opr;
+  static LIR_Opr _a0_opr;
+  static LIR_Opr _a1_opr;
+  static LIR_Opr _a2_opr;
+  static LIR_Opr _a3_opr;
+  static LIR_Opr _t0_opr;
+  static LIR_Opr _t1_opr;
+  static LIR_Opr _t2_opr;
+  static LIR_Opr _t3_opr;
+  static LIR_Opr _t4_opr;
+  static LIR_Opr _t5_opr;
+  static LIR_Opr _t6_opr;
+  static LIR_Opr _t7_opr;
+  static LIR_Opr _t8_opr;
+  static LIR_Opr _t9_opr;
+  static LIR_Opr _s0_opr;
+  static LIR_Opr _s1_opr;
+  static LIR_Opr _s2_opr;
+  static LIR_Opr _s3_opr;
+  static LIR_Opr _s4_opr;
+  static LIR_Opr _s5_opr;
+  static LIR_Opr _s6_opr;
+  static LIR_Opr _s7_opr;
+  static LIR_Opr _gp_opr;
+  static LIR_Opr _fp_opr;
+  static LIR_Opr _sp_opr;
+  static LIR_Opr _ra_opr;
+  static LIR_Opr _k0_opr;
+  static LIR_Opr _k1_opr;
+
+  static LIR_Opr _f0_opr;
+  static LIR_Opr _f12_opr;
+  static LIR_Opr _f14_opr;
+  static LIR_Opr _d0_opr;
+  static LIR_Opr _d12_opr;
+  static LIR_Opr _d14_opr;
+
+  static LIR_Opr _a0_a1_opr;
+  static LIR_Opr _a2_a3_opr;
+  static LIR_Opr _v0_v1_opr;
+
+
+  static LIR_Opr receiver_opr;
+  static LIR_Opr _zero_oop_opr;
+  static LIR_Opr _at_oop_opr;
+  static LIR_Opr _v0_oop_opr;
+  static LIR_Opr _v1_oop_opr;
+  static LIR_Opr _a0_oop_opr;
+  static LIR_Opr _a1_oop_opr;
+  static LIR_Opr _a2_oop_opr;
+  static LIR_Opr _a3_oop_opr;
+  static LIR_Opr _t0_oop_opr;
+  static LIR_Opr _t1_oop_opr;
+  static LIR_Opr _t2_oop_opr;
+  static LIR_Opr _t3_oop_opr;
+  static LIR_Opr _t4_oop_opr;
+  static LIR_Opr _t5_oop_opr;
+  static LIR_Opr _t6_oop_opr;
+  static LIR_Opr _t7_oop_opr;
+  static LIR_Opr _t8_oop_opr;
+  static LIR_Opr _t9_oop_opr;
+  static LIR_Opr _s0_oop_opr;
+  static LIR_Opr _s1_oop_opr;
+  static LIR_Opr _s2_oop_opr;
+  static LIR_Opr _s3_oop_opr;
+  static LIR_Opr _s4_oop_opr;
+  static LIR_Opr _s5_oop_opr;
+  static LIR_Opr _s6_oop_opr;
+  static LIR_Opr _s7_oop_opr;
+  static LIR_Opr _gp_oop_opr;
+  static LIR_Opr _fp_oop_opr;
+  static LIR_Opr _sp_oop_opr;
+  static LIR_Opr _ra_oop_opr;
+  static LIR_Opr _k0_oop_opr;
+  static LIR_Opr _k1_oop_opr;
+
+  static LIR_Opr _f0_oop_opr;
+  static LIR_Opr _f12_oop_opr;
+  static LIR_Opr _f14_oop_opr;
+  static LIR_Opr _d0_oop_opr;
+  static LIR_Opr _d12_oop_opr;
+  static LIR_Opr _d14_oop_opr;
+
+  static LIR_Opr _a0_a1_oop_opr;
+  static LIR_Opr _a2_a3_oop_opr;
+  static LIR_Opr _v0_v1_oop_opr;
+
+  static LIR_Opr _a0_a1_long_opr;
+  static LIR_Opr _a2_a3_long_opr;
+  static LIR_Opr _v0_v1_long_opr;
+  static LIR_Opr _f0_float_opr;
+  static LIR_Opr _f12_float_opr;
+  static LIR_Opr _f14_float_opr;
+  static LIR_Opr _d0_double_opr;
+  static LIR_Opr _d12_double_opr;
+  static LIR_Opr _d14_double_opr;
+
+static LIR_Opr as_long_opr(Register r, Register r2){
+	  return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r2));
+}
+
+static LIR_Opr as_float_opr(FloatRegister r) {
+	return LIR_OprFact::single_fpu(r->encoding());
+}
+
+
+static bool is_caller_save_register (LIR_Opr  opr);
+static bool is_caller_save_register (Register r);
+
+
+// OptoReg name for spilled virtual FPU register n
+//OptoReg::Name fpu_regname (int n);
+
+static VMReg fpu_regname (int n);
+static Register first_register();
+static FloatRegister nr2floatreg (int rnr);
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_LIRAssembler_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,4493 @@
+#/*
+ * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_c1_LIRAssembler_mips.cpp.incl"
+
+#define __ _masm->
+
+static void select_different_registers(Register preserve,
+		Register extra,
+		Register &tmp1,
+		Register &tmp2) {
+	if (tmp1 == preserve) {
+		assert_different_registers(tmp1, tmp2, extra);
+		tmp1 = extra;
+	} else if (tmp2 == preserve) {
+		assert_different_registers(tmp1, tmp2, extra);
+		tmp2 = extra;
+	}
+	assert_different_registers(preserve, tmp1, tmp2);
+}
+
+
+
+static void select_different_registers(Register preserve,
+		Register extra,
+		Register &tmp1,
+		Register &tmp2,
+		Register &tmp3) {
+	if (tmp1 == preserve) {
+		assert_different_registers(tmp1, tmp2, tmp3, extra);
+		tmp1 = extra;
+	} else if (tmp2 == preserve) {
+		//////assert_different_registers(tmp1, tmp2, tmp3, extra);
+		tmp2 = extra;
+	} else if (tmp3 == preserve) {
+		assert_different_registers(tmp1, tmp2, tmp3, extra);
+		tmp3 = extra;
+	}
+	assert_different_registers(preserve, tmp1, tmp2, tmp3);
+}
+
+// need add method Assembler::is_simm16 in assembler_gs2.hpp
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
+	if (opr->is_constant()) {
+		LIR_Const* constant = opr->as_constant_ptr();
+		switch (constant->type()) {
+			case T_INT: {
+				jint value = constant->as_jint();
+				return Assembler::is_simm16(value);
+				    }
+			default:
+				    return false;
+		}
+	}
+	return false;
+}
+//FIXME, which register should be used?
+LIR_Opr LIR_Assembler::receiverOpr() {
+	//return FrameMap::ecx_oop_opr;
+	return FrameMap::_t0_oop_opr;
+}
+
+LIR_Opr LIR_Assembler::incomingReceiverOpr() {
+	return receiverOpr();
+}
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+	//return FrameMap::ecx_opr;
+//	return FrameMap::_v1_opr;
+	return FrameMap::_t0_opr;
+}
+
+//--------------fpu register translations-----------------------
+// FIXME:I do not know what's to do for mips fpu
+
+address LIR_Assembler::float_constant(float f) {
+	address const_addr = __ float_constant(f);
+	if (const_addr == NULL) {
+		bailout("const section overflow");
+		return __ code()->consts()->start();
+	} else {
+		return const_addr;
+	}
+}
+
+
+address LIR_Assembler::double_constant(double d) {
+	address const_addr = __ double_constant(d);
+	if (const_addr == NULL) {
+		bailout("const section overflow");
+		return __ code()->consts()->start();
+	} else {
+		return const_addr;
+	}
+}
+
+
+
+
+
+void LIR_Assembler::reset_FPU() {
+	Unimplemented();
+}
+
+
+void LIR_Assembler::set_24bit_FPU() {
+	Unimplemented();
+}
+
+//FIXME.
+void LIR_Assembler::fpop() {
+	// do nothing
+}
+void LIR_Assembler::fxch(int i) {
+	// do nothing
+}
+void LIR_Assembler::fld(int i) {
+	// do nothing
+}
+void LIR_Assembler::ffree(int i) {
+	// do nothing
+}
+
+void LIR_Assembler::breakpoint() {
+  __ brk(17);
+}
+//FIXME, opr can not be float?
+void LIR_Assembler::push(LIR_Opr opr) {
+	if (opr->is_single_cpu()) {
+		__ push_reg(opr->as_register());
+	} else if (opr->is_double_cpu()) {
+		__ push_reg(opr->as_register_hi());
+		__ push_reg(opr->as_register_lo());
+	} else if (opr->is_stack()) {
+		__ push_addr(frame_map()->address_for_slot(opr->single_stack_ix()));
+	} else if (opr->is_constant()) {
+		LIR_Const* const_opr = opr->as_constant_ptr();
+		if (const_opr->type() == T_OBJECT) {
+			__ push_oop(const_opr->as_jobject());
+		} else if (const_opr->type() == T_INT) {
+			__ push_jint(const_opr->as_jint());
+		} else {
+			ShouldNotReachHere();
+		}
+
+	} else {
+		ShouldNotReachHere();
+	}
+}
+
+void LIR_Assembler::pop(LIR_Opr opr) {
+	if (opr->is_single_cpu() ) {
+		// __ pop(opr->rinfo().as_register());
+		__ pop(opr->as_register());
+	} else {
+		assert(false, "Must be single word register or floating-point register");
+	}
+}
+
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+	Register reg = addr->base()->as_register();
+	// now we need this for parameter pass
+	//assert(reg != SP && reg != FP, "address must be in heap, not stack");
+	return Address(reg, addr->disp());
+}
+
+
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+	return as_Address(addr);
+}
+
+
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+	Register reg = addr->base()->as_register();
+	return Address(reg, addr->disp()+longSize/2);
+}
+
+
+//void LIR_Assembler::osr_entry(IRScope* scope, int number_of_locks, Label* continuation, int osr_bci) {
+void LIR_Assembler::osr_entry() {
+	//  assert(scope->is_top_scope(), "inlined OSR not yet implemented");
+	offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+	BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+	ValueStack* entry_state = osr_entry->state();
+	int number_of_locks = entry_state->locks_size();
+
+	// we jump here if osr happens with the interpreter
+	// state set up to continue at the beginning of the
+	// loop that triggered osr - in particular, we have
+	// the following registers setup:
+	//
+	// S7: interpreter locals pointer
+	// V1: interpreter locks pointer
+	// RA: return address
+	//T0: OSR buffer
+	// build frame
+	// ciMethod* m = scope->method();
+	ciMethod* m = compilation()->method();
+	__ build_frame(initial_frame_size_in_bytes());
+
+  // OSR buffer is
+  //
+  // locals[nlocals-1..0]
+  // monitors[0..number_of_locks]
+  //
+  // locals is a direct copy of the interpreter frame so in the osr buffer
+  // so first slot in the local array is the last local from the interpreter
+  // and last slot is local[0] (receiver) from the interpreter
+  //
+  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+  // in the interpreter frame (the method lock if a sync method)
+
+  // Initialize monitors in the compiled activation.
+  //   T0: pointer to osr buffer
+  //
+  // All other registers are dead at this point and the locals will be
+  // copied into place by code emitted in the IR.
+
+  Register OSR_buf = osrBufferPointer()->as_register();
+
+
+   // note: we do osr only if the expression stack at the loop beginning is empty,
+  //       in which case the spill area is empty too and we don't have to setup
+  //       spilled locals
+  //
+  // copy monitors
+  // V1: pointer to locks
+  {
+	  assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+	  int monitor_offset = BytesPerWord * method()->max_locals()+
+		  (BasicObjectLock::size() * BytesPerWord) * (number_of_locks - 1);
+	  for (int i = 0; i < number_of_locks; i++) {
+		  int slot_offset =monitor_offset - (i * BasicObjectLock::size())*BytesPerWord;
+#ifdef ASSERT
+		  {
+			  Label L;
+			  //__ lw(AT, V1, slot_offset * BytesPerWord + BasicObjectLock::obj_offset_in_bytes());
+			  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::obj_offset_in_bytes());
+			  __ bne(AT, ZERO, L);
+			  __ delayed()->nop();
+			  __ stop("locked object is NULL");
+			  __ bind(L);
+		  }
+#endif
+		  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::lock_offset_in_bytes());
+		  __ sw(AT, frame_map()->address_for_monitor_lock(i));
+		  __ lw(AT, OSR_buf, slot_offset + BasicObjectLock::obj_offset_in_bytes());
+		  __ sw(AT, frame_map()->address_for_monitor_object(i));
+	  }
+  }
+}
+
+
+int LIR_Assembler::check_icache() {
+	Register receiver = FrameMap::receiver_opr->as_register();
+	Register ic_klass = IC_Klass;
+
+	int offset = __ offset();
+	__ inline_cache_check(receiver, IC_Klass);
+	__ align(CodeEntryAlignment);
+	return offset;
+
+
+}
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo* info) {
+	jobject o = NULL;
+	PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id);
+	int oop_index = __ oop_recorder()->allocate_index(o);
+	RelocationHolder rspec = oop_Relocation::spec(oop_index);
+	__ relocate(rspec);
+	__ lui(reg, Assembler::split_high((int)o));
+	__ addiu(reg, reg, Assembler::split_low((int)o));
+	// patching_epilog(patch, LIR_Op1::patch_normal, noreg, info);
+	patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+
+void LIR_Assembler::monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register unused, int monitor_no, Register exception) {
+
+	if (exception->is_valid()) {
+		// preserve exception
+		// note: the monitor_exit runtime call is a leaf routine
+		//       and cannot block => no GC can happen
+		// The slow case (MonitorAccessStub) uses the first two stack slots
+		// ([SP+0] and [SP+4]), therefore we store the exception at [esp+8]
+		__ sw(exception, SP, 2 * wordSize);
+	}
+
+	Register obj_reg  = obj_opr->as_register();
+	Register lock_reg = lock_opr->as_register();
+
+	// compute pointer to BasicLock
+	//Address lock_addr = frame_map()->address_for_monitor_lock_index(monitor_no);
+	Address lock_addr = frame_map()->address_for_monitor_lock(monitor_no);
+	__ lea(lock_reg, lock_addr);
+	// unlock object
+	// MonitorAccessStub* slow_case = new MonitorExitStub(lock_, true, monitor_no);
+	MonitorAccessStub* slow_case = new MonitorExitStub(lock_opr, true, monitor_no);
+	// _slow_case_stubs->append(slow_case);
+	// temporary fix: must be created after exceptionhandler, therefore as call stub
+	//_call_stubs->append(slow_case);
+	_slow_case_stubs->append(slow_case);
+	if (UseFastLocking) {
+		// try inlined fast unlocking first, revert to slow locking if it fails
+		// note: lock_reg points to the displaced header since the displaced header offset is 0!
+		assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+		__ unlock_object(NOREG, obj_reg, lock_reg, *slow_case->entry());
+	} else {
+		// always do slow unlocking
+		// note: the slow unlocking code could be inlined here, however if we use
+		//       slow unlocking, speed doesn't matter anyway and this solution is
+		//       simpler and requires less duplicated code - additionally, the
+		//       slow unlocking code is the same in either case which simplifies
+		//       debugging
+		__ b(*slow_case->entry());
+		__ delayed()->nop();
+	}
+	// done
+	__ bind(*slow_case->continuation());
+
+	if (exception->is_valid()) {
+		// restore exception
+		__ lw(exception, SP, 2 * wordSize);
+	}
+}
+
+// This specifies the esp decrement needed to build the frame
+int LIR_Assembler::initial_frame_size_in_bytes() {
+	// if rounding, must let FrameMap know!
+	return (frame_map()->framesize() - 2)  * BytesPerWord; // subtract two words to account for return address and link
+}
+
+void LIR_Assembler::emit_exception_handler() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  // Lazy deopt bug 4932387. If last instruction is a call then we
+  // need an area to patch where we won't overwrite the exception
+  // handler. This means we need 5 bytes. Could use a fat_nop
+  // but since this never gets executed it doesn't really make
+  // much difference.
+  //
+//	for (int i = 0; i < (NativeCall::instruction_size/4 + 1) ; i++ ) {
+	for (int i = 0; i < (NativeCall::instruction_size/2+1) ; i++ ) {
+		__ nop();
+	}
+
+  // generate code for exception handler
+	//address handler_base = __ start_a_stub(1*K);//by_css
+	address handler_base = __ start_a_stub(exception_handler_size);
+	if (handler_base == NULL) {
+		//no enough space
+		bailout("exception handler overflow");
+		return;
+	}
+
+
+
+	compilation()->offsets()->set_value(CodeOffsets::Exceptions, code_offset());
+	// if the method does not have an exception handler, then there is
+	// no reason to search for one
+	if (compilation()->has_exception_handlers() || JvmtiExport::can_post_exceptions()) {
+		// the exception oop and pc are in V0 and V1
+		// no other registers need to be preserved, so invalidate them
+//		__ invalidate_registers(false, true, true, false, true, true);
+
+		// check that there is really an exception
+		__ verify_not_null_oop(V0);
+
+		// search an exception handler (V0: exception oop, V1: throwing pc)
+		__ call(Runtime1::entry_for(Runtime1::handle_exception_nofpu_id),
+				relocInfo::runtime_call_type);
+		__ delayed()->nop();
+    // if the call returns here, then the exception handler for particular
+    // exception doesn't exist -> unwind activation and forward exception to caller
+  }
+
+	// the exception oop is in V0
+	// no other registers need to be preserved, so invalidate them
+//	__ invalidate_registers(false, true, true, true, true, true);
+
+	// check that there is really an exception
+	__ verify_not_null_oop(V0);
+
+	// unlock the receiver/klass if necessary
+	// V0: exception
+	ciMethod* method = compilation()->method();
+	if (method->is_synchronized() && GenerateSynchronizationCode) {
+		monitorexit(FrameMap::_t0_oop_opr, FrameMap::_t6_opr, NOREG, 0, V0);
+	}
+
+	// unwind activation and forward exception to caller
+	// V0: exception
+	/*if (compilation()->jvmpi_event_method_exit_enabled()) {
+		__ jmp(Runtime1::entry_for(Runtime1::jvmpi_unwind_exception_id),
+				relocInfo::runtime_call_type);
+	} else*/ {
+		__ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id),
+				relocInfo::runtime_call_type);
+	}
+	__ delayed()->nop();
+	__ end_a_stub();
+
+
+
+    }
+
+void LIR_Assembler::emit_deopt_handler() {
+	// if the last instruction is a call (typically to do a throw which
+	// is coming at the end after block reordering) the return address
+ 	// must still point into the code area in order to avoid assertion
+ 	// failures when searching for the corresponding bci => add a nop
+ 	// (was bug 5/14/1999 - gri)
+
+ 	__ nop();
+
+ 	// generate code for exception handler
+	address handler_base = __ start_a_stub(deopt_handler_size);
+	if (handler_base == NULL) {
+		// not enough space left for the handler
+		bailout("deopt handler overflow");
+		return;
+	}
+	#ifdef ASSERT
+	int offset = code_offset();
+	#endif // ASSERT
+
+	compilation()->offsets()->set_value(CodeOffsets::Deopt, code_offset());
+
+	InternalAddress here(__ pc());
+	//FIXE:: may be wrong, Address_Literal
+	__ lw(AT, __ as_Address(here) );
+	__ push(AT);
+
+        //XXXXX:FIXE  need jump
+
+//	__ jr(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+	assert(code_offset() - offset <= deopt_handler_size, "overflow");
+ 	__ end_a_stub();
+
+}
+
+
+// Optimized Library calls
+// This is the fast version of java.lang.String.compare; it has not
+// OSR-entry and therefore, we generate a slow version for OSR's
+//void LIR_Assembler::emit_string_compare(IRScope* scope) {
+void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info) {
+	// get two string object in T0&T1
+	//receiver already in T0
+	__ lw(T1, arg1->as_register());
+
+	// Get addresses of first characters from both Strings
+	{
+		// CodeEmitInfo* info = new CodeEmitInfo(scope, 0, NULL);
+		// add_debug_info_for_null_check_here(info);
+	}
+	__ lw (T2, T0, java_lang_String::value_offset_in_bytes());	//value, T_CHAR array
+	__ lw (AT, T0, java_lang_String::offset_offset_in_bytes());	//offset
+	__ shl(AT, 1);
+	__ add(T2, T2, AT);
+	__ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_CHAR));
+	// Now T2 is the address of the first char in first string(T0)
+
+	{
+		// CodeEmitInfo* info = new CodeEmitInfo(scope, 0, NULL);
+		add_debug_info_for_null_check_here(info);
+	}
+	__ lw (T3, T1, java_lang_String::value_offset_in_bytes());
+	__ lw (AT, T1, java_lang_String::offset_offset_in_bytes());
+	__ shl(AT, 1);
+	__ add(T3, T3, AT);
+	__ addi(T3, T3, arrayOopDesc::base_offset_in_bytes(T_CHAR));
+	// Now T3 is the address of the first char in second string(T1)
+
+	// compute minimum length (in T4) and difference of lengths (V0)
+	Label L;
+	__ lw (T4, Address(T0, java_lang_String::count_offset_in_bytes()));
+	// the length of the first string(T0)
+	__ lw (T5, Address(T1, java_lang_String::count_offset_in_bytes()));
+	// the length of the second string(T1)
+
+	__ subu(V0, T4, T5);
+	__ blez(V0, L);
+	__ delayed()->nop();
+	__ move (T4, T5);
+	__ bind (L);
+
+	Label Loop, haveResult, LoopEnd;
+	__ bind(Loop);
+	__ beq(T4, ZERO, LoopEnd);
+	__ delayed();
+
+	__ addi(T2, T2, 2);
+
+	// compare current character
+	__ lhu(T5, T2, -2);
+	__ lhu(T6, T3, 0);
+	__ bne(T5, T6, haveResult);
+	__ delayed();
+
+	__ addi(T3, T3, 2);
+
+	__ b(Loop);
+	__ delayed()->addi(T4, T4, -1);
+
+	__ bind(haveResult);
+	__ subu(V0, T5, T6);
+
+	__ bind(LoopEnd);
+	//return_op(FrameMap::_v0RInfo, false);
+	//FIXME
+	return_op(FrameMap::_v0_opr);
+}
+
+
+//void LIR_Assembler::return_op(RInfo result, bool result_is_oop) {
+void LIR_Assembler::return_op(LIR_Opr result) {
+	assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, "word returns are in V0");
+	// Pop the stack before the safepoint code
+	__ leave();
+	/*if (compilation()->jvmpi_event_method_exit_enabled()) {
+		//   jvmpi_method_exit(compilation()->method(), result_is_oop);
+		// __ movl(ecx, method()->encoding());
+		//__ jmp(Runtime1::entry_for(Runtime1::jvmpi_method_exit_id),
+		//relocInfo::runtime_call_type);
+		//__ lw(T0,method()->encoding());
+		int oop_index = __ oop_recorder()->find_index(method()->encoding());
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(T0, Assembler::split_high((int)method()->encoding()));
+		__ addiu(T0, T0, Assembler::split_low((int)method()->encoding()));
+
+		__ jmp(Runtime1::entry_for(Runtime1::jvmpi_method_exit_id),
+				relocInfo::runtime_call_type);
+		__ delayed()->nop();
+	}
+	else*/{
+
+			// the poll sets the condition code, but no data registers
+		//__ relocate(relocInfo::poll_return_type);
+		// __ testl(eax, polling_page);
+		//__ ret(0);
+		__ lui(AT, Assembler::split_high((intptr_t)os::get_polling_page()
+				+ (SafepointPollOffset % os::vm_page_size())));
+		__ relocate(relocInfo::poll_return_type);
+		__ lw(AT, AT, Assembler::split_low((intptr_t)os::get_polling_page()
+				+ (SafepointPollOffset % os::vm_page_size())));
+		__ jr(RA);
+		__ delayed()->nop();
+	}
+ }
+
+//read protect mem to ZERO won't cause the exception only in godson-2e, So I modify ZERO to AT .@jerome,11/25,2006
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+	if (info != NULL) {
+		add_debug_info_for_branch(info);
+	}else{
+		ShouldNotReachHere();
+	}
+	int offset = __ offset();
+	__ lui(AT, Assembler::split_high((intptr_t)os::get_polling_page()
+				+ (SafepointPollOffset % os::vm_page_size())));
+	__ relocate(relocInfo::poll_type);
+	__ lw(AT, AT, Assembler::split_low((intptr_t)os::get_polling_page()
+				+ (SafepointPollOffset % os::vm_page_size())));
+
+	return offset;
+
+}
+
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
+	if (from_reg != to_reg) __ move(to_reg, from_reg);
+}
+
+
+void LIR_Assembler::swap_reg(Register a, Register b) {
+	__ xorr(a, a, b);
+	__ xorr(b, a, b);
+	__ xorr(a, a, b);
+}
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+ 	assert(src->is_constant(), "should not call otherwise");
+   	assert(dest->is_register(), "should not call otherwise");
+     	LIR_Const* c = src->as_constant_ptr();
+	switch (c->type()) {
+	case T_INT:
+		{
+			jint con = c->as_jint();
+			if (dest->is_single_cpu()) {
+				assert(patch_code == lir_patch_none, "no patching handled here");
+				__ move(dest->as_register(), con);
+			} else {
+				assert(dest->is_single_fpu(), "wrong register kind");
+				__ move(AT, con);
+				__ mtc1(AT, dest->as_float_reg());
+			}
+		}
+		break;
+
+	case T_LONG:
+		{
+			jlong con = c->as_jlong();
+			jint* conhi = (jint*)&con + 1;
+			jint* conlow = (jint*)&con;
+
+			if (dest->is_double_cpu()) {
+				__ move(dest->as_register_lo(), *conlow);
+				__ move(dest->as_register_hi(), *conhi);
+			} else {
+			//	assert(dest->is_double(), "wrong register kind");
+				__ move(AT, *conlow);
+				__ mtc1(AT, dest->as_double_reg());
+				__ move(AT, *conhi);
+				__ mtc1(AT, dest->as_double_reg()+1);
+			}
+		}
+		break;
+
+	case T_OBJECT:
+		{
+			if (patch_code == lir_patch_none) {
+				if (c->as_jobject() == NULL) {
+					NEEDS_CLEANUP
+	int oop_index = __ oop_recorder()->allocate_index(c->as_jobject());
+	RelocationHolder rspec = oop_Relocation::spec(oop_index);
+	__ relocate(rspec);
+	__ lui(dest->as_register(), Assembler::split_high((int) c->as_jobject() ));
+	__ addiu( dest->as_register() , dest->as_register() , Assembler::split_low((int) c->as_jobject()));
+
+
+						//__ move(dest->as_register(), ZERO);
+				} else {
+					int oop_index = __ oop_recorder()->find_index(c->as_jobject());
+					RelocationHolder rspec = oop_Relocation::spec(oop_index);
+					__ relocate(rspec);
+					__ lui(dest->as_register(), Assembler::split_high((int)c->as_jobject()));
+					__ addiu(dest->as_register(), dest->as_register(), Assembler::split_low((int)c->as_jobject()));
+				}
+			} else {
+				jobject2reg_with_patching(dest->as_register(), info);
+			}
+		}
+		break;
+
+	case T_FLOAT:
+		{
+			address const_addr = float_constant(c->as_jfloat());
+			assert (const_addr != NULL, "must create float constant in the constant table");
+
+			if (dest->is_single_fpu()) {
+				__ relocate(relocInfo::internal_pc_type);
+				__ lui(AT, Assembler::split_high((int)const_addr));
+				__ addiu(AT, AT, Assembler::split_low((int)const_addr));
+				__ lwc1(dest->as_float_reg(), AT, 0);
+
+			} else {
+				assert(dest->is_single_cpu(), "Must be a cpu register.");
+				assert(dest->as_register() != AT, "AT can not be allocated.");
+
+				__ relocate(relocInfo::internal_pc_type);
+				__ lui(AT, Assembler::split_high((int)const_addr));
+				__ addiu(AT, AT, Assembler::split_low((int)const_addr));
+				__ lw(dest->as_register(), AT, 0);
+			}
+		}
+		break;
+
+	case T_DOUBLE:
+		{
+			address const_addr = double_constant(c->as_jdouble());
+			assert (const_addr != NULL, "must create double constant in the constant table");
+
+			if (dest->is_double_fpu()) {
+				__ relocate(relocInfo::internal_pc_type);
+				__ lui(AT, Assembler::split_high((int)const_addr));
+				__ addiu(AT, AT, Assembler::split_low((int)const_addr));
+				__ lwc1(dest->as_double_reg(), AT, 0);
+				__ lwc1(dest->as_double_reg()+1, AT, 4);
+			} else {
+//				assert(dest->is_long(), "Must be a long register.");
+				assert(dest->as_register_lo() != AT, "AT can not be allocated.");
+				assert(dest->as_register_hi() != AT, "AT can not be allocated.");
+
+				__ relocate(relocInfo::internal_pc_type);
+				__ lui(AT, Assembler::split_high((int)const_addr));
+				__ addiu(AT, AT, Assembler::split_low((int)const_addr));
+				__ lw(dest->as_register_lo(), AT, 0);
+				__ lw(dest->as_register_hi(), AT, 4);
+			}
+		}
+		break;
+
+	default:
+		ShouldNotReachHere();
+	}
+}
+
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+  	assert(src->is_constant(), "should not call otherwise");
+   	assert(dest->is_stack(), "should not call otherwise");
+     	LIR_Const* c = src->as_constant_ptr();
+  	switch (c->type()) {
+    		case T_INT:  // fall through
+    		case T_FLOAT:
+ 			__ move(AT, c->as_jint_bits());
+			__ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
+			break;
+
+    		case T_OBJECT:
+      			//__ movl(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jobject());
+      			if (c->as_jobject() == NULL) {
+				__ sw(ZERO, frame_map()->address_for_slot(dest->single_stack_ix()));
+			} else {
+				int oop_index = __ oop_recorder()->find_index(c->as_jobject());
+				RelocationHolder rspec = oop_Relocation::spec(oop_index);
+				__ relocate(rspec);
+				__ lui(AT, Assembler::split_high((int)c->as_jobject()));
+				__ addiu(AT, AT, Assembler::split_low((int)c->as_jobject()));
+				__ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
+				}
+			break;
+    		case T_LONG:  // fall through
+    		case T_DOUBLE:
+      	//		__ movl(frame_map()->address_for_slot(dest->double_stack_ix(),
+	//					lo_word_offset_in_bytes), c->as_jint_lo_bits());
+      	//		__ movl(frame_map()->address_for_slot(dest->double_stack_ix(),
+         //                              hi_word_offset_in_bytes), c->as_jint_hi_bits());
+      			__ move(AT, c->as_jint_lo());
+			__ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
+							lo_word_offset_in_bytes));
+	 		__ move(AT, c->as_jint_hi());
+			__ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix(),
+							hi_word_offset_in_bytes));
+			break;
+
+    		default:
+    			ShouldNotReachHere();
+  }
+
+}
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info ) {
+	assert(src->is_constant(), "should not call otherwise");
+	assert(dest->is_address(), "should not call otherwise");
+	LIR_Const* c = src->as_constant_ptr();
+	LIR_Address* addr = dest->as_address_ptr();
+
+	if (info != NULL) add_debug_info_for_null_check_here(info);
+	switch (type) {
+		case T_LONG: // fall through
+		case T_DOUBLE:
+			__ move(AT, c->as_jint_hi());
+			__ sw(AT, as_Address_hi(addr));
+			__ move(AT, c->as_jint_lo());
+			__ sw(AT, as_Address_lo(addr));
+			break;
+		case T_OBJECT:  // fall through
+		case T_ARRAY:
+			if (c->as_jobject() == NULL){
+				__ sw(ZERO, as_Address(addr));
+			} else {
+				int oop_index = __ oop_recorder()->find_index(c->as_jobject());
+				RelocationHolder rspec = oop_Relocation::spec(oop_index);
+				__ relocate(rspec);
+				__ lui(AT, Assembler::split_high((int)c->as_jobject()));
+				__ addiu(AT, AT, Assembler::split_low((int)c->as_jobject()));
+				__ sw(AT, as_Address(addr));
+			}
+			break;
+		case T_INT:     // fall through
+		case T_FLOAT:
+			__ move(AT, c->as_jint_bits());
+			__ sw(AT, as_Address(addr));
+			break;
+		case T_BOOLEAN: // fall through
+		case T_BYTE:
+			__ move(AT, c->as_jint());
+			__ sb(AT, as_Address(addr));
+			break;
+		case T_CHAR:    // fall through
+		case T_SHORT:
+			__ move(AT, c->as_jint());
+			__ sh(AT, as_Address(addr));
+			break;
+		default: ShouldNotReachHere();
+	};
+}
+
+void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
+  assert(src->is_register(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+  if (dest->is_float_kind() && src->is_float_kind()) {
+		if (dest->is_single_fpu()) {
+			assert(src->is_single_fpu(), "must both be float");
+			//__ mfc1(AT, src->as_float_reg());
+			//__ mtc1(AT, dest->as_float_reg());
+			 __ mov_s(dest->as_float_reg(), src->as_float_reg());
+		} else {
+			assert(src->is_double_fpu(), "must bothe be double");
+			//__ dmfc1(AT, src->as_double_reg());
+			//__ dmtc1(AT, dest->as_double_reg());
+			__ mov_d( dest->as_double_reg(),src->as_double_reg());
+		}
+  } else if (!dest->is_float_kind() && !src->is_float_kind()) {
+	  if (dest->is_single_cpu()) {
+		  assert(src->is_single_cpu(), "must match");
+		  move_regs(src->as_register(), dest->as_register());
+	  } else if (dest->is_double_cpu()) {
+		  //      assert(src->is_double_cpu() && !src->overlaps(dest), "must match and not overlap");
+		  assert(src->is_double_cpu(),"must match and not overlap");
+		  move_regs(src->as_register_lo(), dest->as_register_lo());
+		  move_regs(src->as_register_hi(), dest->as_register_hi());
+	  }
+  } else {
+	  // float to int or int to float moves
+	  if (dest->is_double_cpu()) {
+		  assert(src->is_double_fpu(), "must match");
+		  __ mfc1(dest->as_register_lo(), src->as_double_reg());
+		  __ mfc1(dest->as_register_hi(), src->as_double_reg() + 1);
+	  } else if (dest->is_single_cpu()) {
+		  assert(src->is_single_fpu(), "must match");
+		  __ mfc1(dest->as_register(), src->as_float_reg());
+	  } else if (dest->is_double_fpu()) {
+		  assert(src->is_double_cpu(), "must match");
+		  __ mtc1(src->as_register_lo(), dest->as_double_reg());
+		  __ mtc1(src->as_register_hi(), dest->as_double_reg() + 1);
+	  } else if (dest->is_single_fpu()) {
+		  assert(src->is_single_cpu(), "must match");
+		  __ mtc1(src->as_register(), dest->as_float_reg());
+	  }
+  }
+}
+
+
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type,bool pop_fpu_stack) {
+  assert(src->is_register(), "should not call otherwise");
+  assert(dest->is_stack(), "should not call otherwise");
+
+  if (src->is_single_cpu()) {
+    Address dst = frame_map()->address_for_slot(dest->single_stack_ix());
+    if (type == T_OBJECT || type == T_ARRAY) {
+      __ verify_oop(src->as_register());
+    }
+ //   __ movl (dst, src->as_register());
+     __ sw(src->as_register(),dst);
+  } else if (src->is_double_cpu()) {
+    Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
+    Address dstHI = frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes);
+   // __ movl (dstLO, src->as_register_lo());
+   //__ movl (dstHI, src->as_register_hi());
+     __ sw(src->as_register_lo(),dstLO);
+     __ sw(src->as_register_hi(),dstHI);
+  }else if (src->is_single_fpu()) {
+    assert(src->fpu_regnr() == 0, "argument must be on TOS");
+    Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
+   // if (pop_fpu_stack)     __ fstp_s (dst_addr);
+    //else                   __ fst_s  (dst_addr);
+     __ swc1(src->as_float_reg(), dst_addr);
+
+  } else if (src->is_double_fpu()) {
+    //assert(src->fpu_regnrLo() == 0, "argument must be on TOS");
+    Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
+   // if (pop_fpu_stack)     __ fstp_d (dst_addr);
+  //  else                   __ fst_d  (dst_addr);
+    __ swc1(src->as_double_reg(), dst_addr);
+    __ swc1(src->as_double_reg() + 1, dst_addr.base(), dst_addr.disp() + 4);
+
+  } else {
+    ShouldNotReachHere();
+  }
+
+  }
+//FIXME
+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,bool pop_fpu_stack, bool/*unaliged*/) {
+        LIR_Address* to_addr = dest->as_address_ptr();
+        Register dest_reg = to_addr->base()->as_register();
+	PatchingStub* patch = NULL;
+	bool needs_patching = (patch_code != lir_patch_none);
+       	Register disp_reg = NOREG;
+	int disp_value = to_addr->disp();
+
+	if (type == T_ARRAY || type == T_OBJECT) __ verify_oop(src->as_register());
+
+	if (needs_patching) {
+		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+		assert(!src->is_double_cpu() ||
+				patch_code == lir_patch_none ||
+				patch_code == lir_patch_normal,
+				"patching doesn't match register");
+	}
+
+	if (info != NULL) {
+     		add_debug_info_for_null_check_here(info);
+        }
+	if (needs_patching) {
+		disp_reg = AT;
+		__ lui(AT, Assembler::split_high(disp_value));
+		__ addiu(AT, AT, Assembler::split_low(disp_value));
+	} else if (!Assembler::is_simm16(disp_value)) {
+		disp_reg = AT;
+		__ lui(AT, Assembler::split_high(disp_value));
+	}
+	int offset = code_offset();
+
+	switch(type) {
+	case T_DOUBLE:
+		assert(src->is_double_fpu(), "just check");
+		if (disp_reg == noreg) {
+			__ swc1(src->as_double_reg(), dest_reg, disp_value);
+			__ swc1(src->as_double_reg()+1, dest_reg, disp_value+4);
+		} else if (needs_patching) {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ swc1(src->as_double_reg(), AT, 0);
+			__ swc1(src->as_double_reg()+1, AT, 4);
+		} else {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ swc1(src->as_double_reg(), AT, Assembler::split_low(disp_value));
+			__ swc1(src->as_double_reg()+1, AT, Assembler::split_low(disp_value) + 4);
+		}
+		break;
+
+	case T_FLOAT:
+	//	assert(src->is_single_cpu(), "just check");
+
+		if (disp_reg == noreg) {
+			__ swc1(src->as_float_reg(), dest_reg, disp_value);
+		} else if(needs_patching) {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ swc1(src->as_float_reg(), AT, 0);
+		} else {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ swc1(src->as_float_reg(), AT, Assembler::split_low(disp_value));
+		}
+		break;
+
+	case T_LONG:{
+
+                Register from_lo = src->as_register_lo();
+      		Register from_hi = src->as_register_hi();
+      	   	Register base = to_addr->base()->as_register();
+       		Register index = noreg;
+     	        if (to_addr->index()->is_register()) {
+       			 index = to_addr->index()->as_register();
+      		}
+     		 if (base == from_lo || index == from_lo) {
+        	assert(base != from_hi, "can't be");
+        	assert(index == noreg || (index != base && index != from_hi), "can't handle this");
+        //	__ movl(as_Address_hi(to_addr), from_hi);
+                 __ sw(from_hi,as_Address_hi(to_addr));
+		if (patch != NULL) {
+          		patching_epilog(patch, lir_patch_high, base, info);
+          		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+          		patch_code = lir_patch_low;
+        	}
+        	//__ movl(as_Address_lo(to_addr), from_lo);
+     		 __ sw(from_lo,as_Address_lo(to_addr));
+		 } else {
+        		assert(index == noreg || (index != base && index != from_lo), "can't handle this");
+        	//	__ movl(as_Address_lo(to_addr), from_lo);
+     		 __ sw(from_lo,as_Address_lo(to_addr));
+		if (patch != NULL) {
+          		patching_epilog(patch, lir_patch_low, base, info);
+          		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+          		patch_code = lir_patch_high;
+         	}
+        	//__ movl(as_Address_hi(to_addr), from_hi);
+                 __ sw(from_hi,as_Address_hi(to_addr));
+     		}
+		break;
+		    }
+	case T_ADDRESS:
+	case T_ARRAY:
+	case T_OBJECT:
+	case T_INT:
+		//assert(from_reg.is_word(), "just check");
+		if (disp_reg == noreg) {
+			__ sw(src->as_register(), dest_reg, disp_value);
+		} else if (needs_patching) {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ sw(src->as_register(), AT, 0);
+		} else {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ sw(src->as_register(), AT, Assembler::split_low(disp_value));
+		}
+		break;
+
+	case T_CHAR:
+	case T_SHORT:
+//		assert(from_reg.is_word(), "just check");
+
+		if (disp_reg == noreg) {
+			__ sh(src->as_register(), dest_reg, disp_value);
+		} else if (needs_patching) {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ sh(src->as_register(), AT, 0);
+		} else {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ sh(src->as_register(), AT, Assembler::split_low(disp_value));
+		}
+		break;
+
+	case T_BYTE:
+	case T_BOOLEAN:
+		assert(src->is_single_cpu(), "just check");
+
+		if (disp_reg == noreg) {
+			__ sb(src->as_register(), dest_reg, disp_value);
+		} else if (needs_patching) {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ sb(src->as_register(), AT, 0);
+		} else {
+			__ add(AT, dest_reg, disp_reg);
+			offset = code_offset();
+			__ sb(src->as_register(), AT, Assembler::split_low(disp_value));
+		}
+		break;
+
+	default:
+		ShouldNotReachHere();
+	}
+
+  if (needs_patching) {
+    patching_epilog(patch, patch_code, to_addr->base()->as_register(), info);
+  }
+
+
+}
+
+
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+	assert(src->is_stack(), "should not call otherwise");
+  	assert(dest->is_register(), "should not call otherwise");
+	if (dest->is_single_cpu()) {
+    //		__ movl(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+    		__ lw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
+    	if (type == T_ARRAY || type == T_OBJECT) {
+      		__ verify_oop(dest->as_register());
+    	}
+	} else if (dest->is_double_cpu()) {
+		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(),lo_word_offset_in_bytes);
+		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
+		__ lw(dest->as_register_lo(), src_addr_LO);
+		__ lw(dest->as_register_hi(), src_addr_HI);
+	}else if (dest->is_single_fpu()) {
+		Address addr = frame_map()->address_for_slot(src->single_stack_ix());
+		__ lwc1(dest->as_float_reg(), addr);
+	} else if (dest->is_double_fpu())  {
+		Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix());
+		Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix());
+		__ lwc1(dest->as_double_reg(), src_addr_LO);
+		__ lwc1(dest->as_double_reg()+1, src_addr_HI);
+	}
+		else {
+		assert(dest->is_single_cpu(), "cannot be anything else but a single cpu");
+		assert(type!= T_ILLEGAL, "Bad type in stack2reg")
+		Address addr = frame_map()->address_for_slot(src->single_stack_ix());
+		__ lw(dest->as_register(), addr);
+	}
+}
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  if (src->is_single_stack()) {
+//   __ pushl(frame_map()->address_for_slot(src ->single_stack_ix()));
+      __ lw(AT, frame_map()->address_for_slot(src ->single_stack_ix()));
+      //__ push(AT);
+      //__ push(frame_map()->address_for_slot(src ->single_stack_ix()));
+ //   __ popl (frame_map()->address_for_slot(dest->single_stack_ix()));
+     // __ pop (frame_map()->address_for_slot(dest->single_stack_ix()));
+      __ sw(AT, frame_map()->address_for_slot(dest->single_stack_ix()));
+
+  } else if (src->is_double_stack()) {
+    //__ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 0));
+   // __ push(frame_map()->address_for_slot(src ->double_stack_ix(), 0));
+      __ lw(AT, frame_map()->address_for_slot(src ->double_stack_ix()));
+      __ sw(AT, frame_map()->address_for_slot(dest->double_stack_ix()));
+		      // __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 4));
+   // __ push(frame_map()->address_for_slot(src ->double_stack_ix(), 4));
+      __ lw(AT, frame_map()->address_for_slot(src ->double_stack_ix(),4));
+      __ sw(AT, frame_map()->address_for_slot(dest ->double_stack_ix(),4));
+
+      //__ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 4));
+    //__ pop (frame_map()->address_for_slot(dest->double_stack_ix(), 4));
+    //__ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 0));
+   // __ pop (frame_map()->address_for_slot(dest->double_stack_ix(), 0));
+
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+
+// if patching needed, be sure the instruction at offset is a MoveMemReg
+//void LIR_Assembler::mem2reg(LIR_Address* addr, RInfo to_reg, BasicType type, LIR_Op1::LIR_PatchCode patch_code, CodeEmitInfo* info) {
+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool) {
+	assert(src->is_address(), "should not call otherwise");
+  	assert(dest->is_register(), "should not call otherwise");
+    	LIR_Address* addr = src->as_address_ptr();
+      	Address from_addr = as_Address(addr);
+
+	Register src_reg = addr->base()->as_register();
+	Register disp_reg = noreg;
+	int disp_value = addr->disp();
+	bool needs_patching = (patch_code != lir_patch_none);
+
+	PatchingStub* patch = NULL;
+	if (needs_patching) {
+		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+//		assert(!to_reg.is_long() || patch_code == LIR_Op1::patch_low || patch_code == LIR_Op1::patch_high, "patching doesn't match register");
+	}
+
+	// we must use lui&addiu,
+	if (needs_patching) {
+		disp_reg = AT;
+		__ lui(AT, Assembler::split_high(disp_value));
+		__ addiu(AT, AT, Assembler::split_low(disp_value));
+	} else if (!Assembler::is_simm16(disp_value)) {
+		disp_reg = AT;
+		__ lui(AT, Assembler::split_high(disp_value));
+	}
+
+	// remember the offset of the load.  The patching_epilog must be done
+	// before the call to add_debug_info, otherwise the PcDescs don't get
+	// entered in increasing order.
+	int offset = code_offset();
+
+	switch(type) {
+	case T_BOOLEAN:
+	case T_BYTE:
+		{
+			//assert(to_reg.is_word(), "just check");
+			if (disp_reg == noreg) {
+				__ lb(dest->as_register(), src_reg, disp_value);
+			} else if (needs_patching) {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lb(dest->as_register(), AT, 0);
+			} else {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lb(dest->as_register(), AT, Assembler::split_low(disp_value));
+			}
+		}
+		break;
+
+	case T_CHAR:
+		{
+			//assert(to_reg.is_word(), "just check");
+
+			if (disp_reg == noreg) {
+				__ lhu(dest->as_register(), src_reg, disp_value);
+			} else if (needs_patching) {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lhu(dest->as_register(), AT, 0);
+			} else {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lhu(dest->as_register(), AT, Assembler::split_low(disp_value));
+			}
+		}
+		break;
+
+	case T_SHORT:
+		{
+		//	assert(to_reg.is_word(), "just check");
+
+			if (disp_reg == noreg) {
+				__ lh(dest->as_register(), src_reg, disp_value);
+			} else if (needs_patching) {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lh(dest->as_register(), AT, 0);
+			} else {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lh(dest->as_register(), AT, Assembler::split_low(disp_value));
+			}
+		}
+		break;
+
+	case T_INT:
+	case T_OBJECT:
+	case T_ARRAY:
+		{
+			//assert(to_reg.is_word(), "just check");
+
+			if (disp_reg == noreg) {
+				__ lw(dest->as_register(), src_reg, disp_value);
+			} else if (needs_patching) {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lw(dest->as_register(), AT, 0);
+			} else {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lw(dest->as_register(), AT, Assembler::split_low(disp_value));
+			}
+		}
+		break;
+
+	case T_LONG:
+		{
+		Register to_lo = dest->as_register_lo();
+     		Register to_hi = dest->as_register_hi();
+      		Register base = addr->base()->as_register();
+      		Register index = noreg;
+      		if (addr->index()->is_register()) {
+        		index = addr->index()->as_register();
+      		}
+      		if ((base == to_lo && index == to_hi) ||(base == to_hi && index == to_lo)) {
+        	// addresses with 2 registers are only formed as a result of
+       		 // array access so this code will never have to deal with
+        	// patches or null checks.
+        	assert(info == NULL && patch == NULL, "must be");
+        //	__ leal(to_hi, as_Address(addr));
+        	__ lea(to_hi, as_Address(addr));
+        	//__ movl(to_lo, Address(to_hi));
+        	__ lw(to_lo, Address(to_hi));
+        	//__ movl(to_hi, Address(to_hi, BytesPerWord));
+        	__ lw(to_hi, Address(to_hi, BytesPerWord));
+      		} else if (base == to_lo || index == to_lo) {
+        	assert(base != to_hi, "can't be");
+        	assert(index == noreg || (index != base && index != to_hi), "can't handle this");
+        	//__ movl(to_hi, as_Address_hi(addr));
+        	__ lw(to_hi, as_Address_hi(addr));
+        	if (patch != NULL) {
+          		patching_epilog(patch, lir_patch_high, base, info);
+          		patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+          		patch_code = lir_patch_low;
+        	}
+        	//__ movl(to_lo, as_Address_lo(addr));
+        	__ lw(to_lo, as_Address_lo(addr));
+      		} else {
+        	assert(index == noreg || (index != base && index != to_lo), "can't handle this");
+        	//__ movl(to_lo, as_Address_lo(addr));
+        	__ lw(to_lo, as_Address_lo(addr));
+        	if (patch != NULL) {
+          	patching_epilog(patch, lir_patch_low, base, info);
+          	patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+          	patch_code = lir_patch_high;
+        	}
+        	//__ movl(to_hi, as_Address_hi(addr));
+        	__ lw(to_hi, as_Address_hi(addr));
+      		}
+      		break;
+    		}
+	case T_FLOAT:
+		{
+			//assert(to_reg.is_float(), "just check");
+			if (disp_reg == noreg) {
+				__ lwc1(dest->as_float_reg(), src_reg, disp_value);
+			} else if (needs_patching) {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lwc1(dest->as_float_reg(), AT, 0);
+			} else {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lwc1(dest->as_float_reg(), AT, Assembler::split_low(disp_value));
+			}
+		}
+		break;
+
+	case T_DOUBLE:
+		{
+			//assert(to_reg.is_double(), "just check");
+
+			if (disp_reg == noreg) {
+				__ lwc1(dest->as_double_reg(), src_reg, disp_value);
+				__ lwc1(dest->as_double_reg()+1, src_reg, disp_value+4);
+			} else if (needs_patching) {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lwc1(dest->as_double_reg(), AT, 0);
+				__ lwc1(dest->as_double_reg()+1, AT, 4);
+			} else {
+				__ add(AT, src_reg, disp_reg);
+				offset = code_offset();
+				__ lwc1(dest->as_double_reg(), AT, Assembler::split_low(disp_value));
+				__ lwc1(dest->as_double_reg()+1, AT, Assembler::split_low(disp_value) + 4);
+			}
+		}
+		break;
+
+	default:
+		ShouldNotReachHere();
+	}
+
+	if (needs_patching) {
+		patching_epilog(patch, patch_code, src_reg, info);
+	}
+
+	if (info != NULL) add_debug_info_for_null_check(offset, info);
+}
+
+
+void LIR_Assembler::prefetchr(LIR_Opr src) {
+  LIR_Address* addr = src->as_address_ptr();
+  Address from_addr = as_Address(addr);
+/*
+  if (VM_Version::supports_sse2()) {
+    __ prefetchnta(from_addr);
+  } else if (VM_Version::supports_sse()) {
+    __ prefetcht2(from_addr);
+  }
+*/
+  }
+
+
+void LIR_Assembler::prefetchw(LIR_Opr src) {
+ /*
+  * if (!VM_Version::supports_prefetchw()) {
+    prefetchr(src);
+    return;
+  }
+
+  LIR_Address* addr = src->as_address_ptr();
+  Address from_addr = as_Address(addr);
+
+  __ prefetchw(from_addr);
+*/
+  }
+NEEDS_CLEANUP; // This could be static?
+Address::ScaleFactor LIR_Assembler::array_element_size(BasicType type) const {
+  int elem_size = type2aelembytes(type);
+  switch (elem_size) {
+    case 1: return Address::times_1;
+    case 2: return Address::times_2;
+    case 4: return Address::times_4;
+    case 8: return Address::times_8;
+  }
+  ShouldNotReachHere();
+  return Address::no_scale;
+}
+
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  switch (op->code()) {
+    case lir_idiv:
+    case lir_irem:
+      arithmetic_idiv(
+        op->code(),
+        op->in_opr1(),
+        op->in_opr2(),
+        op->in_opr3(),
+        op->result_opr(),
+        op->info());
+      break;
+    default:      ShouldNotReachHere(); break;
+  }
+}
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+	LIR_Opr opr1 = op->left();
+	LIR_Opr opr2 = op->right();
+	LIR_Condition condition = op->cond();
+#ifdef ASSERT
+	assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+	if (op->block() != NULL)  _branch_target_blocks.append(op->block());
+	if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
+#endif
+	if (op->cond() == lir_cond_always) {
+		__ b(*op->label());
+		__ delayed()->nop();
+		return;
+	}
+  if (opr1->is_single_cpu()) {
+
+		Register reg_op1 = opr1->as_register();
+		if (opr2->is_single_cpu()) {
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+			Register reg_op2 = opr2->as_register();
+			switch (condition) {
+			case lir_cond_equal:
+				__ beq(reg_op1, reg_op2, *op->label());
+				break;
+			case lir_cond_notEqual:
+				__ bne(reg_op1, reg_op2, *op->label());
+				break;
+			case lir_cond_less:
+				// AT = 1 TRUE
+				__ slt(AT, reg_op1, reg_op2);
+				__ bne(AT, ZERO, *op->label());
+				break;
+			case lir_cond_lessEqual:
+				// AT = 0 TRUE
+				__ slt(AT, reg_op2, reg_op1);
+				__ beq(AT, ZERO, *op->label());
+				break;
+			case lir_cond_belowEqual:
+				// AT = 0 TRUE
+				__ sltu(AT, reg_op2, reg_op1);
+				__ beq(AT, ZERO, *op->label());
+				break;
+			case lir_cond_greaterEqual:
+				// AT = 0 TRUE
+				__ slt(AT, reg_op1, reg_op2);
+				__ beq(AT, ZERO, *op->label());
+				break;
+			case lir_cond_aboveEqual:
+				// AT = 0 TRUE
+				__ sltu(AT, reg_op1, reg_op2);
+				__ beq(AT, ZERO, *op->label());
+				break;
+			case lir_cond_greater:
+				// AT = 1 TRUE
+				__ slt(AT, reg_op2, reg_op1);
+				__ bne(AT, ZERO, *op->label());
+				break;
+			default: ShouldNotReachHere();
+			}
+		} else if (opr2->is_constant()) {
+			jint temp_value;
+			bool is_object = false;
+			if (opr2->pointer()->as_constant()->type() == T_INT) {
+				temp_value = (jint)(opr2->as_jint());
+			} else if (opr2->pointer()->as_constant()->type() == T_OBJECT) {
+				is_object = true;
+				temp_value = (jint)(opr2->as_jobject());
+			} else {
+				ShouldNotReachHere();
+			}
+
+			switch (condition) {
+			case lir_cond_equal:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+			if (temp_value) {
+				if (is_object) {
+					int oop_index = __ oop_recorder()->allocate_index((jobject)temp_value);
+					RelocationHolder rspec = oop_Relocation::spec(oop_index);
+					__ relocate(rspec);
+				}
+				__ move(AT, temp_value);
+				__ beq(reg_op1, AT, *op->label());
+			} else {
+				__ beq(reg_op1, ZERO, *op->label());
+			}
+			break;
+
+			case lir_cond_notEqual:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+			if (temp_value) {
+				if (is_object) {
+					int oop_index = __ oop_recorder()->allocate_index((jobject)temp_value);
+					RelocationHolder rspec = oop_Relocation::spec(oop_index);
+					__ relocate(rspec);
+				}
+				__ move(AT, temp_value);
+				__ bne(reg_op1, AT, *op->label());
+			} else {
+				__ bne(reg_op1, ZERO, *op->label());
+			}
+			break;
+
+			case lir_cond_less:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+				// AT = 1 TRUE
+				if (Assembler::is_simm16(temp_value)) {
+					__ slti(AT, reg_op1, temp_value);
+				} else {
+					__ move(AT, temp_value);
+					__ slt(AT, reg_op1, AT);
+				}
+				__ bne(AT, ZERO, *op->label());
+				break;
+
+			case lir_cond_lessEqual:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+				// AT = 0 TRUE
+				__ move(AT, temp_value);
+				__ slt(AT, AT, reg_op1);
+				__ beq(AT, ZERO, *op->label());
+				break;
+
+			case lir_cond_belowEqual:
+				// AT = 0 TRUE
+#ifdef OPT_RANGECHECK
+				if (op->check()) {
+					__ move(AT, temp_value);
+					add_debug_info_for_range_check_here(op->info(), temp_value);
+					__ tgeu(AT, reg_op1, 29);
+				} else {
+#endif
+					__ move(AT, temp_value);
+					__ sltu(AT, AT, reg_op1);
+					__ beq(AT, ZERO, *op->label());
+#ifdef OPT_RANGECHECK
+				}
+#endif
+				break;
+
+			case lir_cond_greaterEqual:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+				// AT = 0 TRUE
+				if (Assembler::is_simm16(temp_value)) {
+					__ slti(AT, reg_op1, temp_value);
+				} else {
+					__ move(AT, temp_value);
+					__ slt(AT, reg_op1, AT);
+				}
+				__ beq(AT, ZERO, *op->label());
+				break;
+
+			case lir_cond_aboveEqual:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+				// AT = 0 TRUE
+				if (Assembler::is_simm16(temp_value)) {
+					__ sltiu(AT, reg_op1, temp_value);
+				} else {
+					__ move(AT, temp_value);
+					__ sltu(AT, reg_op1, AT);
+				}
+				__ beq(AT, ZERO, *op->label());
+				break;
+
+			case lir_cond_greater:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+				// AT = 1 TRUE
+				__ move(AT, temp_value);
+				__ slt(AT, AT, reg_op1);
+				__ bne(AT, ZERO, *op->label());
+				break;
+
+			default: ShouldNotReachHere();
+			}
+
+		} else {
+			if (opr2->is_address()) {
+				__ lw(AT, as_Address(opr2->pointer()->as_address()));
+			} else if (opr2->is_stack()) {
+				__ lw(AT, frame_map()->address_for_slot(opr2->single_stack_ix()));
+			} else {
+				ShouldNotReachHere();
+			}
+			switch (condition) {
+				case lir_cond_equal:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+					__ beq(reg_op1, AT, *op->label());
+					break;
+				case lir_cond_notEqual:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+					__ bne(reg_op1, AT, *op->label());
+					break;
+				case lir_cond_less:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+					// AT = 1 TRUE
+					__ slt(AT, reg_op1, AT);
+					__ bne(AT, ZERO, *op->label());
+					break;
+				case lir_cond_lessEqual:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+					// AT = 0 TRUE
+					__ slt(AT, AT, reg_op1);
+					__ beq(AT, ZERO, *op->label());
+					break;
+				case lir_cond_belowEqual:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+					// AT = 0 TRUE
+					__ sltu(AT, AT, reg_op1);
+					__ beq(AT, ZERO, *op->label());
+					break;
+				case lir_cond_greaterEqual:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+					// AT = 0 TRUE
+					__ slt(AT, reg_op1, AT);
+					__ beq(AT, ZERO, *op->label());
+					break;
+				case lir_cond_aboveEqual:
+					// AT = 0 TRUE
+#ifdef OPT_RANGECHECK
+					if (op->check()) {
+						add_debug_info_for_range_check_here(op->info(), opr1->rinfo());
+						__ tgeu(reg_op1, AT, 29);
+					} else {
+#endif
+						__ sltu(AT, reg_op1, AT);
+						__ beq(AT, ZERO, *op->label());
+#ifdef OPT_RANGECHECK
+					}
+#endif
+					break;
+				case lir_cond_greater:
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+					// AT = 1 TRUE
+					__ slt(AT, AT, reg_op1);
+					__ bne(AT, ZERO, *op->label());
+					break;
+				default: ShouldNotReachHere();
+			}
+		}
+#ifdef OPT_RANGECHECK
+		if (!op->check())
+#endif
+		__ delayed()->nop();
+
+  } else if(opr1->is_address() || opr1->is_stack()) {
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+    if (opr2->is_constant()) {
+			jint temp_value;
+      if (opr2->as_constant_ptr()->type() == T_INT) {
+        temp_value = (jint)opr2->as_constant_ptr()->as_jint();
+      } else if (opr2->as_constant_ptr()->type() == T_OBJECT) {
+        temp_value = (jint)opr2->as_constant_ptr()->as_jobject();
+      } else {
+				ShouldNotReachHere();
+			}
+
+			if (Assembler::is_simm16(temp_value)) {
+				if (opr1->is_address()) {
+					__ lw(AT, as_Address(opr1->pointer()->as_address()));
+				} else {
+					__ lw(AT, frame_map()->address_for_slot(opr1->single_stack_ix()));
+				}
+
+				switch(condition) {
+
+				case lir_cond_equal:
+					__ addi(AT, AT, -(int)temp_value);
+					__ beq(AT, ZERO, *op->label());
+					break;
+				case lir_cond_notEqual:
+					__ addi(AT, AT, -(int)temp_value);
+					__ bne(AT, ZERO, *op->label());
+					break;
+				case lir_cond_less:
+					// AT = 1 TRUE
+					__ slti(AT, AT, temp_value);
+					__ bne(AT, ZERO, *op->label());
+					break;
+				case lir_cond_lessEqual:
+					// AT = 0 TRUE
+					__ addi(AT, AT, -temp_value);
+					__ slt(AT, ZERO, AT);
+					__ beq(AT, ZERO, *op->label());
+					break;
+				case lir_cond_belowEqual:
+					// AT = 0 TRUE
+					__ addiu(AT, AT, -temp_value);
+					__ sltu(AT, ZERO, AT);
+					__ beq(AT, ZERO, *op->label());
+					break;
+				case lir_cond_greaterEqual:
+					// AT = 0 TRUE
+					__ slti(AT, AT, temp_value);
+					__ beq(AT, ZERO, *op->label());
+					break;
+				case lir_cond_aboveEqual:
+					// AT = 0 TRUE
+					__ sltiu(AT, AT, temp_value);
+					__ beq(AT, ZERO, *op->label());
+					break;
+				case lir_cond_greater:
+					// AT = 1 TRUE
+					__ addi(AT, AT, -temp_value);
+					__ slt(AT, ZERO, AT);
+					__ bne(AT, ZERO, *op->label());
+					break;
+
+
+				default:
+				//jerome_for_debug
+					Unimplemented();
+				}
+			} else {
+        Unimplemented();
+      }
+    } else {
+      Unimplemented();
+    }
+
+		__ delayed()->nop();
+
+  } else if(opr1->is_double_cpu()) {
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+		Register opr1_lo = opr1->as_register_lo();
+		Register opr1_hi = opr1->as_register_hi();
+
+		if (opr2->is_double_cpu()) {
+			Register opr2_lo = opr2->as_register_lo();
+			Register opr2_hi = opr2->as_register_hi();
+			switch (condition) {
+			case lir_cond_equal:
+				{
+					Label L;
+					__ bne(opr1_lo, opr2_lo, L);
+					__ delayed()->nop();
+					__ beq(opr1_hi, opr2_hi, *op->label());
+					__ delayed()->nop();
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_notEqual:
+				__ bne(opr1_lo, opr2_lo, *op->label());
+				__ delayed()->nop();
+				__ bne(opr1_hi, opr2_hi, *op->label());
+				__ delayed()->nop();
+				break;
+
+			case lir_cond_less:
+				{
+					Label L;
+
+					// if hi less then jump
+					__ slt(AT, opr1_hi, opr2_hi);
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					// if hi great then fail
+					__ bne(opr1_hi, opr2_hi, L);
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ sltu(AT, opr1_lo, opr2_lo);
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_lessEqual:
+				{
+					Label L;
+
+					// if hi great then fail
+					__ slt(AT, opr2_hi, opr1_hi);
+					__ bne(AT, ZERO, L);
+					__ delayed()->nop();
+
+					// if hi less then jump
+					__ bne(opr2_hi, opr1_hi, *op->label());
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ sltu(AT, opr2_lo, opr1_lo);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_belowEqual:
+				{
+					Label L;
+
+					// if hi great then fail
+					__ sltu(AT, opr2_hi, opr1_hi);
+					__ bne(AT, ZERO, L);
+					__ delayed()->nop();
+
+					// if hi less then jump
+					__ bne(opr2_hi, opr1_hi, *op->label());
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ sltu(AT, opr2_lo, opr1_lo);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_greaterEqual:
+				{
+					Label L;
+
+					// if hi less then fail
+					__ slt(AT, opr1_hi, opr2_hi);
+					__ bne(AT, ZERO, L);
+					__ delayed()->nop();
+
+					// if hi great then jump
+					__ bne(opr2_hi, opr1_hi, *op->label());
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ sltu(AT, opr1_lo, opr2_lo);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_aboveEqual:
+				{
+					Label L;
+
+					// if hi less then fail
+					__ sltu(AT, opr1_hi, opr2_hi);
+					__ bne(AT, ZERO, L);
+					__ delayed()->nop();
+
+					// if hi great then jump
+					__ bne(opr2_hi, opr1_hi, *op->label());
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ sltu(AT, opr1_lo, opr2_lo);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_greater:
+				{
+					Label L;
+
+					// if hi great then jump
+					__ slt(AT, opr2_hi, opr1_hi);
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					// if hi less then fail
+					__ bne(opr2_hi, opr1_hi, L);
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ sltu(AT, opr2_lo, opr1_lo);
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			default: ShouldNotReachHere();
+			}
+
+		} else if(opr2->is_constant()) {
+			jlong lv = opr2->as_jlong();
+			jint iv_lo = (jint)lv;
+			jint iv_hi = (jint)(lv>>32);
+			bool is_zero = (lv==0);
+
+			switch (condition) {
+			case lir_cond_equal:
+				if (is_zero) {
+					__ orr(AT, opr1_lo, opr1_hi);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+				} else {
+					Label L;
+					__ move(T8, iv_lo);
+					__ bne(opr1_lo, T8, L);
+					__ delayed();
+					__ move(T8, iv_hi);
+					__ beq(opr1_hi, T8, *op->label());
+					__ delayed()->nop();
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_notEqual:
+				if (is_zero) {
+					__ orr(AT, opr1_lo, opr1_hi);
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+				} else {
+					__ move(T8, iv_lo);
+					__ bne(opr1_lo, T8, *op->label());
+					__ delayed();
+					__ move(T8, iv_hi);
+					__ bne(opr1_hi, T8, *op->label());
+					__ delayed()->nop();
+				}
+				break;
+
+			case lir_cond_less:
+				if (is_zero) {
+					__ bltz(opr1_hi, *op->label());
+					__ delayed()->nop();
+				} else {
+					Label L;
+
+					// if hi less then jump
+					__ move(T8, iv_hi);
+					__ slt(AT, opr1_hi, T8);
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					// if hi great then fail
+					__ bne(opr1_hi, T8, L);
+					__ delayed();
+
+					// now just comp lo as unsigned
+					if (Assembler::is_simm16(iv_lo)) {
+						__ sltiu(AT, opr1_lo, iv_lo);
+					} else {
+						__ move(T8, iv_lo);
+						__ sltu(AT, opr1_lo, T8);
+					}
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_lessEqual:
+				if (is_zero) {
+					__ bltz(opr1_hi, *op->label());
+					__ delayed()->nop();
+					__ orr(AT, opr1_hi, opr1_lo);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed();
+				} else {
+					Label L;
+
+					// if hi great then fail
+					__ move(T8, iv_hi);
+					__ slt(AT, T8, opr1_hi);
+					__ bne(AT, ZERO, L);
+					__ delayed()->nop();
+
+					// if hi less then jump
+					__ bne(T8, opr1_hi, *op->label());
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ move(T8, iv_lo);
+					__ sltu(AT, T8, opr1_lo);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_belowEqual:
+				if (is_zero) {
+					__ orr(AT, opr1_hi, opr1_lo);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+				} else {
+					Label L;
+
+					// if hi great then fail
+					__ move(T8, iv_hi);
+					__ sltu(AT, T8, opr1_hi);
+					__ bne(AT, ZERO, L);
+					__ delayed()->nop();
+
+					// if hi less then jump
+					__ bne(T8, opr1_hi, *op->label());
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ move(T8, iv_lo);
+					__ sltu(AT, T8, opr1_lo);
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_greaterEqual:
+				if (is_zero) {
+					__ bgez(opr1_hi, *op->label());
+					__ delayed()->nop();
+				} else {
+					Label L;
+
+					// if hi less then fail
+					__ move(T8, iv_hi);
+					__ slt(AT, opr1_hi, T8);
+					__ bne(AT, ZERO, L);
+					__ delayed()->nop();
+
+					// if hi great then jump
+					__ bne(T8, opr1_hi, *op->label());
+					__ delayed();
+
+					// now just comp lo as unsigned
+					if (Assembler::is_simm16(iv_lo)) {
+						__ sltiu(AT, opr1_lo, iv_lo);
+					} else {
+						__ move(T8, iv_lo);
+						__ sltu(AT, opr1_lo, T8);
+					}
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_aboveEqual:
+				if (is_zero) {
+					__ b(*op->label());
+					__ delayed()->nop();
+				} else {
+					Label L;
+
+					// if hi less then fail
+					__ move(T8, iv_hi);
+					__ sltu(AT, opr1_hi, T8);
+					__ bne(AT, ZERO, L);
+					__ delayed()->nop();
+
+					// if hi great then jump
+					__ bne(T8, opr1_hi, *op->label());
+					__ delayed();
+
+					// now just comp lo as unsigned
+					if (Assembler::is_simm16(iv_lo)) {
+						__ sltiu(AT, opr1_lo, iv_lo);
+					} else {
+						__ move(T8, iv_lo);
+						__ sltu(AT, opr1_lo, T8);
+					}
+					__ beq(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			case lir_cond_greater:
+				if (is_zero) {
+					Label L;
+					__ bgtz(opr1_hi, *op->label());
+					__ delayed()->nop();
+					__ bne(opr1_hi, ZERO, L);
+					__ delayed()->nop();
+					__ bne(opr1_lo, ZERO, *op->label());
+					__ delayed()->nop();
+					__ bind(L);
+				} else {
+					Label L;
+
+					// if hi great then jump
+					__ move(T8, iv_hi);
+					__ slt(AT, T8, opr1_hi);
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					// if hi less then fail
+					__ bne(T8, opr1_hi, L);
+					__ delayed();
+
+					// now just comp lo as unsigned
+					__ move(T8, iv_lo);
+					__ sltu(AT, T8, opr1_lo);
+					__ bne(AT, ZERO, *op->label());
+					__ delayed()->nop();
+
+					__ bind(L);
+				}
+				break;
+
+			default:
+				ShouldNotReachHere();
+			}
+		} else {
+			Unimplemented();
+		}
+  } else if (opr1->is_single_fpu()) {
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+		assert(opr2->is_single_fpu(), "change the code");
+
+		FloatRegister reg_op1 = opr1->as_float_reg();
+		FloatRegister reg_op2 = opr2->as_float_reg();
+	//	bool un_ls
+		bool un_jump = (op->ublock()->label()==op->label());
+
+		Label& L = *op->label();
+
+		switch (condition) {
+		case lir_cond_equal:
+			if (un_jump)
+				__ c_ueq_s(reg_op1, reg_op2);
+			else
+				__ c_eq_s(reg_op1, reg_op2);
+			__ bc1t(L);
+
+			break;
+
+		case lir_cond_notEqual:
+			if (un_jump)
+				__ c_eq_s(reg_op1, reg_op2);
+			else
+				__ c_ueq_s(reg_op1, reg_op2);
+			__ bc1f(L);
+
+			break;
+
+		case lir_cond_less:
+			if (un_jump)
+				__ c_ult_s(reg_op1, reg_op2);
+			else
+				__ c_olt_s(reg_op1, reg_op2);
+			__ bc1t(L);
+
+			break;
+
+		case lir_cond_lessEqual:
+		case lir_cond_belowEqual:
+			if (un_jump)
+				__ c_ule_s(reg_op1, reg_op2);
+			else
+				__ c_ole_s(reg_op1, reg_op2);
+			__ bc1t(L);
+
+			break;
+
+		case lir_cond_greaterEqual:
+		case lir_cond_aboveEqual:
+			if (un_jump)
+				__ c_olt_s(reg_op1, reg_op2);
+			else
+				__ c_ult_s(reg_op1, reg_op2);
+			__ bc1f(L);
+
+			break;
+
+		case lir_cond_greater:
+			if (un_jump)
+				__ c_ole_s(reg_op1, reg_op2);
+			else
+				__ c_ule_s(reg_op1, reg_op2);
+			__ bc1f(L);
+
+			break;
+
+		default:
+			ShouldNotReachHere();
+		}
+		__ delayed()->nop();
+  } else if (opr1->is_double_fpu()) {
+#ifdef OPT_RANGECHECK
+			assert(!op->check(), "just check");
+#endif
+		assert(opr2->is_double_fpu(), "change the code");
+
+		FloatRegister reg_op1 = opr1->as_double_reg();
+		FloatRegister reg_op2 = opr2->as_double_reg();
+		bool un_jump = (op->ublock()->label()==op->label());
+		Label& L = *op->label();
+
+		switch (condition) {
+		case lir_cond_equal:
+			if (un_jump)
+				__ c_ueq_d(reg_op1, reg_op2);
+			else
+				__ c_eq_d(reg_op1, reg_op2);
+			__ bc1t(L);
+
+			break;
+
+		case lir_cond_notEqual:
+			if (un_jump)
+				__ c_eq_d(reg_op1, reg_op2);
+			else
+				__ c_ueq_d(reg_op1, reg_op2);
+			__ bc1f(L);
+
+			break;
+
+		case lir_cond_less:
+			if (un_jump)
+				__ c_ult_d(reg_op1, reg_op2);
+			else
+				__ c_olt_d(reg_op1, reg_op2);
+			__ bc1t(L);
+
+			break;
+
+		case lir_cond_lessEqual:
+		case lir_cond_belowEqual:
+			if (un_jump)
+				__ c_ule_d(reg_op1, reg_op2);
+			else
+				__ c_ole_d(reg_op1, reg_op2);
+			__ bc1t(L);
+
+			break;
+
+		case lir_cond_greaterEqual:
+		case lir_cond_aboveEqual:
+			if (un_jump)
+				__ c_olt_d(reg_op1, reg_op2);
+			else
+				__ c_ult_d(reg_op1, reg_op2);
+			__ bc1f(L);
+
+			break;
+
+		case lir_cond_greater:
+			if (un_jump)
+				__ c_ole_d(reg_op1, reg_op2);
+			else
+				__ c_ule_d(reg_op1, reg_op2);
+			__ bc1f(L);
+
+			break;
+
+		default:
+			ShouldNotReachHere();
+		}
+
+		__ delayed()->nop();
+  } else {
+    Unimplemented();
+	}
+}
+
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+  LIR_Opr value        = op->in_opr();
+  LIR_Opr src       = op->in_opr();
+ // RInfo dst            = op->result_opr()->rinfo();
+  LIR_Opr dest      = op->result_opr();
+  Bytecodes::Code code = op->bytecode();
+
+  switch (code) {
+    case Bytecodes::_i2l:
+      move_regs(src->as_register(), dest->as_register_lo());
+      __ sra (dest->as_register_hi(), dest->as_register_lo(), 31);
+      break;
+
+
+    case Bytecodes::_l2i:
+			move_regs (src->as_register_lo(), dest->as_register());
+      break;
+    case Bytecodes::_i2b:
+	move_regs (src->as_register(), dest->as_register());
+      __ sign_extend_byte(dest->as_register());
+      break;
+    case Bytecodes::_i2c:
+      __ andi(dest->as_register(), src->as_register(), 0xFFFF);
+      break;
+    case Bytecodes::_i2s:
+	move_regs (src->as_register(), dest->as_register());
+      __ sign_extend_short(dest->as_register());
+      break;
+   case Bytecodes::_f2d:
+	__ cvt_d_s(dest->as_double_reg(), src->as_float_reg());
+      break;
+    case Bytecodes::_d2f:
+	__ cvt_s_d(dest->as_float_reg(), src->as_double_reg());
+	break;
+    case Bytecodes::_i2f:
+	{
+		FloatRegister df = dest->as_float_reg();
+		if(src->is_single_cpu()) {
+			__ mtc1(src->as_register(), df);
+			__ cvt_s_w(df, df);
+		} else if (src->is_stack()) {
+			Address src_addr = src->is_single_stack()
+				? frame_map()->address_for_slot(src->single_stack_ix())
+				: frame_map()->address_for_slot(src->double_stack_ix());
+			__ lw(AT, src_addr);
+			__ mtc1(AT, df);
+			__ cvt_s_w(df, df);
+		} else {
+			Unimplemented();
+		}
+
+
+	}
+	break;
+   case Bytecodes::_i2d:
+	{
+		FloatRegister dd = dest->as_double_reg();
+		if (src->is_single_cpu()) {
+			__ mtc1(src->as_register(), dd);
+			__ cvt_d_w(dd, dd);
+		} else if (src->is_stack()) {
+			Address src_addr = src->is_single_stack()
+				? frame_map()->address_for_slot(value->single_stack_ix())
+				: frame_map()->address_for_slot(value->double_stack_ix());
+			__ lw(AT, src_addr);
+			__ mtc1(AT, dd);
+			__ cvt_d_w(dd, dd);
+		} else {
+			Unimplemented();
+		}
+	}
+	break;
+   case Bytecodes::_f2i:
+	{
+		FloatRegister fval = src->as_float_reg();
+		Register dreg = dest->as_register();
+
+		Label L;
+		__ c_un_s(fval, fval);    //NaN?
+		__ bc1t(L);
+		__ delayed();
+		__ move(dreg, ZERO);
+
+		__ trunc_w_s(F30, fval);
+		__ mfc1(dreg, F30);
+		__ bind(L);
+	}
+	break;
+   case Bytecodes::_d2i:
+	{
+		FloatRegister dval = src->as_double_reg();
+		Register dreg = dest->as_register();
+
+		Label L;
+		__ c_un_d(dval, dval);    //NaN?
+		__ bc1t(L);
+		__ delayed();
+		__ move(dreg, ZERO);
+
+		__ trunc_w_d(F30, dval);
+		__ mfc1(dreg, F30);
+		__ bind(L);
+	}
+	break;
+      case Bytecodes::_l2f:
+	{
+		FloatRegister ldf = dest->as_float_reg();
+		if (src->is_double_cpu()) {
+			__ mtc1(src->as_register_lo(), ldf);
+			__ mtc1(src->as_register_hi(), ldf + 1);
+			__ cvt_s_l(ldf, ldf);
+		} else if (src->is_double_stack()) {
+			Address src_addr=frame_map()->address_for_slot(value->double_stack_ix());
+			__ lw(AT, src_addr);
+			__ mtc1(AT, ldf);
+			__ lw(AT, src_addr.base(), src_addr.disp() + 4);
+			__ mtc1(AT, ldf + 1);
+			__ cvt_s_l(ldf, ldf);
+		} else {
+			Unimplemented();
+		}
+	}
+	break;
+      case Bytecodes::_l2d:
+	{
+		FloatRegister ldd = dest->as_double_reg();
+		if (src->is_double_cpu()) {
+			__ mtc1(src->as_register_lo(), ldd);
+			__ mtc1(src->as_register_hi(), ldd + 1);
+			__ cvt_d_l(ldd, ldd);
+		} else if (src->is_double_stack()) {
+			Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
+			__ lw(AT, src_addr);
+			__ mtc1(AT, ldd);
+			__ lw(AT, src_addr.base(), src_addr.disp() + 4);
+			__ mtc1(AT, ldd + 1);
+			__ cvt_d_l(ldd, ldd);
+		} else {
+			Unimplemented();
+		}
+	}
+	break;
+
+
+      case Bytecodes::_f2l:
+	{
+		FloatRegister fval = src->as_float_reg();
+		Register dlo = dest->as_register_lo();
+		Register dhi = dest->as_register_hi();
+
+		Label L;
+		__ move(dhi, ZERO);
+		__ c_un_s(fval, fval);    //NaN?
+		__ bc1t(L);
+		__ delayed();
+		__ move(dlo, ZERO);
+
+		__ trunc_l_s(F30, fval);
+		__ mfc1(dlo, F30);
+		__ mfc1(dhi, F31);
+		__ bind(L);
+	}
+	break;
+      case Bytecodes::_d2l:
+	{
+		FloatRegister dval = src->as_double_reg();
+		Register dlo = dest->as_register_lo();
+		Register dhi = dest->as_register_hi();
+
+		Label L;
+		__ move(dhi, ZERO);
+		__ c_un_d(dval, dval);    //NaN?
+		__ bc1t(L);
+		__ delayed();
+		__ move(dlo, ZERO);
+
+		__ trunc_l_d(F30, dval);
+		__ mfc1(dlo, F30);
+		__ mfc1(dhi, F31);
+		__ bind(L);
+	}
+	break;
+
+
+      default: ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+	if (op->init_check()) {
+		//	__ cmpl(Address(op->klass()->as_register(),
+		//	instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)),
+		//	instanceKlass::fully_initialized);
+		add_debug_info_for_null_check_here(op->stub()->info());
+		//	__ jcc(Assembler::notEqual, *op->stub()->entry());
+		__ lw(AT,Address(op->klass()->as_register(),instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)));
+                __ addi(AT, AT, -instanceKlass::fully_initialized);
+		__ bne(AT,ZERO,*op->stub()->entry());
+		__ delayed()->nop();
+	}
+	__ allocate_object(
+			op->obj()->as_register(),
+			op->tmp1()->as_register(),
+			op->tmp2()->as_register(),
+			op->header_size(),
+			op->object_size(),
+			op->klass()->as_register(),
+			*op->stub()->entry());
+
+	__ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+
+
+	if (UseSlowPath ||
+		(!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
+		(!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
+		__ b(*op->stub()->entry());
+		__ delayed()->nop();
+	} else {
+		Register len =  op->len()->as_register();
+		Register tmp1 = op->tmp1()->as_register();
+		Register tmp2 = op->tmp2()->as_register();
+		Register tmp3 = op->tmp3()->as_register();
+		__ allocate_array(op->obj()->as_register(),
+				len,
+				tmp1,
+				tmp2,
+				tmp3,
+				arrayOopDesc::header_size(op->type()),
+				array_element_size(op->type()),
+				op->klass()->as_register(),
+				*op->stub()->entry());
+	}
+	__ bind(*op->stub()->continuation());
+}
+
+
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+	LIR_Code code = op->code();
+	if (code == lir_store_check) {
+		Register value = op->object()->as_register();
+		Register array = op->array()->as_register();
+		Register k_RInfo = op->tmp1()->as_register();
+		Register klass_RInfo = op->tmp2()->as_register();
+
+		CodeStub* stub = op->stub();
+		Label done;
+
+		__ beq(value, ZERO, done);
+		__ delayed()->nop();
+		add_debug_info_for_null_check_here(op->info_for_exception());
+
+		__ lw(k_RInfo, array, oopDesc::klass_offset_in_bytes());
+		__ lw(klass_RInfo, value, oopDesc::klass_offset_in_bytes());
+
+		__ lw(k_RInfo, k_RInfo,  objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc));
+		// get super_check_offset
+	//for SIGBUS, FIXME, Jerome
+		__ nop();
+		__ nop();
+		__ lw(T9, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
+
+		// See if we get an immediate positive hit
+		__ add(AT, klass_RInfo, T9);
+		__ lw(AT, AT, 0);
+		__ beq(k_RInfo, AT, done);
+		__ delayed()->nop();
+
+		// check for immediate negative hit
+		__ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+		__ bne(T9, AT, *stub->entry());			// fail
+		__ delayed()->nop();
+
+		// check for self
+		__ beq(klass_RInfo, k_RInfo, done);
+		__ delayed()->nop();
+
+		// super type array
+		__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
+		// length
+		__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());
+
+		// base
+		__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+		Label miss, hit, loop;
+		// T9:count, T8:base, k_RInfo: super klass
+		__ bind(loop);
+		__ beq(T9, ZERO, miss);
+		__ delayed()->lw(AT, T8, 0);
+		__ beq(AT, k_RInfo, hit);
+		__ delayed();
+		__ addiu(T9, T9, -1);
+		__ b(loop);
+		__ delayed();
+		__ addi(T8, T8, 1 * wordSize);
+
+		__ bind(miss);
+		__ b(*stub->entry());
+		__ delayed()->nop();
+
+		__ bind(hit);
+		__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc)
+				+ Klass::secondary_super_cache_offset_in_bytes());
+
+		__ bind(done);
+	} else if (op->code() == lir_checkcast) {
+		// we always need a stub for the failure case.
+		CodeStub* stub = op->stub();
+		Register obj = op->object()->as_register();
+		Register k_RInfo = op->tmp1()->as_register();
+		Register klass_RInfo = op->tmp2()->as_register();
+		Register dst = op->result_opr()->as_register();
+		ciKlass* k = op->klass();
+		Register Rtmp1 = noreg;
+		Label done;
+		if (obj == k_RInfo) {
+			k_RInfo = dst;
+		} else if (obj == klass_RInfo) {
+			klass_RInfo = dst;
+		}
+		if (k->is_loaded()) {
+			select_different_registers(obj, dst, k_RInfo, klass_RInfo);
+		} else {
+			Rtmp1 = op->tmp3()->as_register();
+			select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
+		}
+		assert_different_registers(obj, k_RInfo, klass_RInfo);
+
+		// patching may screw with our temporaries on sparc,
+		// so let's do it before loading the class
+		if (!k->is_loaded()) {
+			jobject2reg_with_patching(k_RInfo, op->info_for_patch());
+		} else {
+			//ciObject2reg(k, k_RInfo);
+			jobject2reg(k->encoding(),k_RInfo);
+		}
+		assert(obj != k_RInfo, "must be different");
+		if (op->profiled_method() != NULL) {
+			ciMethod* method = op->profiled_method();
+			int bci          = op->profiled_bci();
+
+			Label profile_done;
+			//  __ jcc(Assembler::notEqual, profile_done);
+			__ bne(obj, ZERO, done);
+			__ delayed()->nop();
+
+			// Object is null; update methodDataOop
+			ciMethodData* md = method->method_data();
+			if (md == NULL) {
+				bailout("out of memory building methodDataOop");
+				return;
+			}
+			ciProfileData* data = md->bci_to_data(bci);
+			assert(data != NULL,       "need data for checkcast");
+			assert(data->is_BitData(), "need BitData for checkcast");
+			Register mdo  = klass_RInfo;
+			//  __ movl(mdo, md->encoding());
+			//__ move(mdo, md->encoding());
+			int oop_index = __ oop_recorder()->find_index(md->encoding());
+			RelocationHolder rspec = oop_Relocation::spec(oop_index);
+			__ relocate(rspec);
+			__ lui(mdo, Assembler::split_high((int)md->encoding()));
+			__ addiu(mdo, mdo, Assembler::split_low((int)md->encoding()));
+
+
+
+			Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
+			//FIXME, it very ineffictive to replace orl with 3 mips instruction @jerome, 12/27,06
+			//__ orl(data_addr, BitData::null_flag_constant());
+			int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
+			__ lw(AT, data_addr);
+			__ ori(AT,AT, header_bits);
+			__ sw(AT,data_addr);
+			__ b(done);
+			__ delayed()->nop();
+			__ bind(profile_done);
+		} else {
+			//   __ jcc(Assembler::equal, done);
+			__ beq(obj, ZERO, done);
+			__ delayed()->nop();
+
+		}
+		__ verify_oop(obj);
+
+		//	__ beq(obj, ZERO, done);
+		//	__ delayed()->nop();
+
+		if (op->fast_check()) {
+			// get object class
+			// not a safepoint as obj null check happens earlier
+			__ lw(AT, obj,  oopDesc::klass_offset_in_bytes());
+			__ bne(AT, k_RInfo, *stub->entry());
+			__ delayed()->nop();
+			__ bind(done);
+		} else {
+			// get object class
+			// not a safepoint as obj null check happens earlier
+			__ lw(klass_RInfo, obj, oopDesc::klass_offset_in_bytes());
+			if (k->is_loaded()) {
+				__ lw(AT, klass_RInfo, k->super_check_offset());
+				// See if we get an immediate positive hit
+				if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
+					__ bne(AT, k_RInfo, *stub->entry());
+					__ delayed()->nop();
+				} else {
+					// See if we get an immediate positive hit
+					__ beq(AT, k_RInfo, done);
+					__ delayed()->nop();
+					// check for self
+					__ beq(klass_RInfo, k_RInfo, done);
+					__ delayed()->nop();
+
+					// array
+					__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
+					// length
+					__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());
+
+					// base
+					__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+					Label miss, hit, loop;
+					// T9:count, T8:base, k_RInfo: super klass
+					__ bind(loop);
+					__ beq(T9, ZERO, miss);
+					__ delayed()->lw(AT, T8, 0);
+					__ beq(AT, k_RInfo, hit);
+					__ delayed();
+					__ addiu(T9, T9, -1);
+					__ b(loop);
+					__ delayed();
+					__ addi(T8, T8, 1 * wordSize);
+
+					__ bind(miss);
+					__ b(*stub->entry());
+					__ delayed()->nop();
+
+					__ bind(hit);
+					__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+				}
+				__ bind(done);
+			} else {
+				//   assert(dst != obj, "need different registers so we have a temporary");
+      //  assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
+
+				// super_check_offset
+       // __ lw(T9, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
+        __ lw(Rtmp1, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
+        // See if we get an immediate positive hit
+				__ add(AT, klass_RInfo, Rtmp1);
+				__ lw(AT, AT, 0);
+				__ beq(k_RInfo, AT, done);
+				__ delayed()->nop();
+        // check for immediate negative hit
+				__ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+				__ bne(Rtmp1, AT,  *stub->entry());
+				__ delayed()->nop();
+        // check for self
+				__ beq(klass_RInfo, k_RInfo, done);
+				__ delayed()->nop();
+
+				// array
+				__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
+				// length
+				__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());
+
+				// base
+				__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+				Label miss, hit, loop;
+				// T9:count, T8:base, k_RInfo: super klass
+				__ bind(loop);
+				__ beq(T9, ZERO, miss);
+				__ delayed()->lw(AT, T8, 0);
+				__ beq(AT, k_RInfo, hit);
+				__ delayed();
+				__ addiu(T9, T9, -1);
+				__ b(loop);
+				__ delayed();
+				__ addi(T8, T8, 1 * wordSize);
+
+				__ bind(miss);
+				__ b(*stub->entry());
+				__ delayed()->nop();
+
+				__ bind(hit);
+				__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+        __ bind(done);
+      }
+
+    }
+    if(dst!=obj)__ move(dst, obj);
+
+  } else if (code == lir_instanceof) {
+    Register obj = op->object()->as_register();
+    Register k_RInfo = op->tmp1()->as_register();
+    Register klass_RInfo = op->tmp2()->as_register();
+    Register dst = op->result_opr()->as_register();
+    ciKlass* k = op->klass();
+
+    Label done;
+    Label zero;
+    Label one;
+
+    if (obj == k_RInfo) {
+	k_RInfo = klass_RInfo;
+	klass_RInfo = obj;
+    }
+
+    // patching may screw with our temporaries on sparc,
+    // so let's do it before loading the class
+    if (!k->is_loaded()) {
+      jobject2reg_with_patching(k_RInfo, op->info_for_patch());
+    } else {
+		//	ciObject2reg(k, k_RInfo);
+			jobject2reg(k->encoding(), k_RInfo);
+		}
+
+    assert(obj != k_RInfo, "must be different");
+    __ verify_oop(obj);
+    __ beq(obj, ZERO, zero);
+    __ delayed()->nop();
+
+    if (op->fast_check()) {
+      // get object class
+      // not a safepoint as obj null check happens earlier
+			__ lw(AT, obj, oopDesc::klass_offset_in_bytes());
+			__ beq(AT, k_RInfo, one);
+			__ delayed()->nop();
+    } else {
+      // get object class
+      // not a safepoint as obj null check happens earlier
+      __ lw(klass_RInfo, obj, oopDesc::klass_offset_in_bytes());
+      if (k->is_loaded()) {
+      //  assert(dst != obj, "need different registers so we have a temporary");
+
+        // See if we get an immediate positive hit
+				__ lw(AT, klass_RInfo, k->super_check_offset());
+				__ beq(AT, k_RInfo, one);
+				__ delayed()->nop();
+        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() == k->super_check_offset()) {
+          // check for self
+					//ciObject2reg(k, AT);
+		jobject2reg(k->encoding(), AT);
+
+		__ beq(klass_RInfo, k_RInfo, one);
+		__ delayed()->nop();
+
+					// array
+					__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
+					// length
+					__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());
+
+					// base
+					__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+					Label loop, hit;
+					// T9:count, T8:base, k_RInfo: super klass
+					__ bind(loop);
+					__ beq(T9, ZERO, zero);
+					__ delayed()->lw(AT, T8, 0);
+					__ beq(AT, k_RInfo, hit);
+					__ delayed();
+					__ addiu(T9, T9, -1);
+					__ b(loop);
+					__ delayed();
+					__ addi(T8, T8, 1 * wordSize);
+
+					__ bind(hit);
+					__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+					__ b(one);
+					__ delayed()->nop();
+        }
+      } else {
+        assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
+
+        __ lw(T9, k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
+				__ add(AT, klass_RInfo, T9);
+				__ lw(AT, AT, 0);
+				__ beq(k_RInfo, AT, one);
+				__ delayed()->nop();
+        // check for immediate negative hit
+				__ move(AT, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+				__ bne(AT, T9, zero);
+				__ delayed()->nop();
+        // check for self
+				__ beq(klass_RInfo, k_RInfo, one);
+				__ delayed()->nop();
+
+				// array
+				__ lw(T8, klass_RInfo, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
+				// length
+				__ lw(T9, T8, arrayOopDesc::length_offset_in_bytes());
+
+				// base
+				__ addi(T8, T8, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+				Label loop, hit;
+				// T9:count, T8:base, k_RInfo: super klass
+				__ bind(loop);
+				__ beq(T9, ZERO, zero);
+				__ delayed()->lw(AT, T8, 0);
+				__ beq(AT, k_RInfo, hit);
+				__ delayed();
+				__ addi(T9, T9, -1);
+				__ b(loop);
+				__ delayed();
+				__ addi(T8, T8, 1 * wordSize);
+
+				__ bind(hit);
+				__ sw(k_RInfo, klass_RInfo, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+				__ b(one);
+				__ delayed()->nop();
+			}
+    }
+    __ bind(zero);
+    __ move(dst, ZERO);
+    __ b(done);
+		__ delayed()->nop();
+
+    __ bind(one);
+    __ move(dst, 1);
+
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+  if (op->code() == lir_cas_long) {
+    Register addr = op->addr()->as_register();
+    if (os::is_MP()) {}
+    __ cmpxchg8(op->new_value()->as_register_lo(),
+				op->new_value()->as_register_hi(),
+				addr,
+				op->cmp_value()->as_register_lo(),
+				op->cmp_value()->as_register_hi());
+
+  } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
+    Register addr = op->addr()->as_register();
+    Register newval = op->new_value()->as_register();
+    Register cmpval = op->cmp_value()->as_register();
+    assert(newval != NULL, "new val must be register");
+    assert(cmpval != newval, "cmp and new values must be in different registers");
+    assert(cmpval != addr, "cmp and addr must be in different registers");
+    assert(newval != addr, "new value and addr must be in different registers");
+    if (os::is_MP()) {
+    }
+		__ cmpxchg(newval, addr, cmpval);
+  } else {
+    Unimplemented();
+  }
+}
+
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result) {
+  }
+
+
+
+
+
+//void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info) {
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info,bool pop_fpu_stack) {
+  assert(info == NULL || ((code == lir_rem || code == lir_div || code == lir_sub) && right->is_double_cpu()), "info is only for ldiv/lrem");
+  if (left->is_double_cpu()) {
+    assert(right->is_double_cpu(),"right must be long");
+    assert(dest->is_double_cpu(), "dest must be long");
+
+    Register op1_lo = left->as_register_lo();
+    Register op1_hi = left->as_register_hi();
+    Register op2_lo = right->as_register_lo();
+    Register op2_hi = right->as_register_hi();
+    Register dst_lo = dest->as_register_lo();
+    Register dst_hi = dest->as_register_hi();
+
+    switch (code) {
+		case lir_add:
+	//		assert_different_registers(dst_lo, op1_lo, op2_lo, op1_hi, op2_hi);
+//			assert_different_registers( op1_lo, op2_lo, op1_hi, op2_hi);
+			__ addu(dst_lo, op1_lo, op2_lo);
+			__ sltu(AT, dst_lo, op2_lo);
+			__ addu(dst_hi, op1_hi, op2_hi);
+			__ addu(dst_hi, dst_hi, AT);
+			break;
+
+		case lir_sub:
+//			assert_different_registers(dst_lo, op1_lo, op2_lo, op1_hi, op2_hi);
+//			assert_different_registers( op1_lo, op2_lo, op1_hi, op2_hi);
+			__ subu(dst_lo, op1_lo, op2_lo);
+			__ sltu(AT, op1_lo, dst_lo);
+			__ subu(dst_hi, op1_hi, op2_hi);
+			__ subu(dst_hi, dst_hi, AT);
+			break;
+
+		case lir_mul:
+//			assert_different_registers(dst_lo, dst_hi, op1_lo, op2_hi);
+			{
+				Label zero, quick, done;
+
+				//zero?
+				__ orr(AT, op2_lo, op1_lo);
+				__ beq(AT, ZERO, zero);
+				__ delayed();
+				__ move(dst_hi, ZERO);
+
+				//quick?
+				__ orr(AT, op2_hi, op1_hi);
+				__ beq(AT, ZERO, quick);
+				__ delayed()->nop();
+
+				__ multu(op2_lo, op1_hi);
+				__ nop();
+				__ nop();
+				__ mflo(dst_hi);
+				__ multu(op2_hi, op1_lo);
+				__ nop();
+				__ nop();
+				__ mflo(AT);
+
+				__ bind(quick);
+				__ multu(op2_lo, op1_lo);
+				__ addu(dst_hi, dst_hi, AT);
+				__ nop();
+				__ mflo(dst_lo);
+				__ mfhi(AT);
+				__ b(done);
+				__ delayed()->addu(dst_hi, dst_hi, AT);
+
+				__ bind(zero);
+				__ move(dst_lo, ZERO);
+				__ bind(done);
+			}
+			break;
+//FIXME, where is the new div and rem?
+/*		case lir_div_strictfp:
+		case lir_div:
+      __ call(Runtime1::entry_for(Runtime1::ldiv_stub_id), relocInfo::runtime_call_type);
+			__ delayed()->nop();
+      add_call_info(code_offset(), info);
+
+			if ( dst_lo != V0 ) {
+				__ move(dst_lo, V0);
+			}
+
+			if ( dst_hi != V1) {
+				__ move(dst_hi, V1);
+			}
+
+			break;
+*/
+			/*
+		case lir_rem:
+      __ call(Runtime1::entry_for(Runtime1::lrem_stub_id), relocInfo::runtime_call_type);
+			__ delayed()->nop();
+      add_call_info(code_offset(), info);
+
+			if ( dst_lo != V0 ) {
+				__ move(dst_lo, V0);
+			}
+
+			if ( dst_hi != V1) {
+				__ move(dst_hi, V1);
+			}
+
+			break;
+ */
+		default:
+      ShouldNotReachHere();
+ 		}
+
+  } else if (left->is_single_cpu()) {
+    Register lreg = left->as_register();
+    Register res = dest->as_register();
+
+		if (right->is_single_cpu()) {
+			Register rreg = right->as_register();
+			switch (code) {
+			case lir_add:
+				__ addu(res, lreg, rreg);
+				break;
+			case lir_mul:
+				__ mult(lreg, rreg);
+				__ nop();
+				__ nop();
+				__ mflo(res);
+				break;
+			case lir_sub:
+				__ subu(res, lreg, rreg);
+				break;
+			default:
+				ShouldNotReachHere();
+			}
+    } else if (right->is_constant()) {
+      jint c = right->as_constant_ptr()->as_jint();
+
+			switch (code) {
+		        case lir_mul_strictfp:
+			case lir_mul:
+				__ move(AT, c);
+				__ mult(lreg, AT);
+				__ nop();
+				__ nop();
+				__ mflo(res);
+				break;
+
+			case lir_add:
+				if (Assembler::is_simm16(c)) {
+					__ addiu(res, lreg, c);
+				} else {
+					__ move(AT, c);
+					__ addu(res, lreg, AT);
+				}
+				break;
+
+			case lir_sub:
+				if (Assembler::is_simm16(-c)) {
+					__ addiu(res, lreg, -c);
+				} else {
+					__ move(AT, c);
+					__ subu(res, lreg, AT);
+				}
+				break;
+
+			default:
+				ShouldNotReachHere();
+			}
+
+		} else {
+			ShouldNotReachHere();
+		}
+	} else if (left->is_single_fpu()) {
+    assert(right->is_single_fpu(),"right must be float");
+    assert(dest->is_single_fpu(), "dest must be float");
+
+		FloatRegister lreg = left->as_float_reg();
+		FloatRegister rreg = right->as_float_reg();
+		FloatRegister res = dest->as_float_reg();
+
+		switch (code) {
+		case lir_add:
+			__ add_s(res, lreg, rreg);
+			break;
+		case lir_sub:
+			__ sub_s(res, lreg, rreg);
+			break;
+		case lir_mul:
+		case lir_mul_strictfp:
+			// i dont think we need special handling of this. FIXME
+			__ mul_s(res, lreg, rreg);
+			break;
+		case lir_div:
+		case lir_div_strictfp:
+			__ div_s(res, lreg, rreg);
+			break;
+		case lir_rem:
+			__ rem_s(res, lreg, rreg);
+			break;
+		default     : ShouldNotReachHere();
+		}
+	} else if (left->is_double_fpu()) {
+    assert(right->is_double_fpu(),"right must be double");
+    assert(dest->is_double_fpu(), "dest must be double");
+
+		FloatRegister lreg = left->as_double_reg();
+		FloatRegister rreg = right->as_double_reg();
+		FloatRegister res = dest->as_double_reg();
+
+		switch (code) {
+		case lir_add:
+			__ add_d(res, lreg, rreg);
+			break;
+		case lir_sub:
+			__ sub_d(res, lreg, rreg);
+			break;
+		case lir_mul:
+		case lir_mul_strictfp:
+			// i dont think we need special handling of this. FIXME
+			// by yjl 9/13/2005
+			__ mul_d(res, lreg, rreg);
+			break;
+		case lir_div:
+		case lir_div_strictfp:
+			__ div_d(res, lreg, rreg);
+			break;
+//		case lir_rem:
+//			__ rem_d(res, lreg, rreg);
+//			break;
+		default     : ShouldNotReachHere();
+		}
+	}
+    else if (left->is_single_stack()||left->is_address()){
+     assert(left == dest, "left and dest must be equal");
+    Address laddr = (left->is_single_stack())? (frame_map()->address_for_slot(left->single_stack_ix())):(as_Address(left->as_address_ptr()));
+   /* if (left->is_single_stack()) {
+       laddr = frame_map()->address_for_slot(left->single_stack_ix());
+    } else if (left->is_address()) {
+      laddr = as_Address(left->as_address_ptr());
+    } else {
+      ShouldNotReachHere();
+    }
+*/
+    if (right->is_single_cpu()) {
+      Register rreg = right->as_register();
+      switch (code) {
+        case lir_add: //__ addl(laddr, rreg);
+	        __ lw(AT, laddr);
+		__ add(AT,AT,rreg);
+	        __ sw(AT, laddr);
+		break;
+        case lir_sub:
+	//	__ subl(laddr, rreg);
+		__ lw(AT, laddr);
+		__ sub(AT,AT,rreg);
+	        __ sw(AT, laddr);
+
+		break;
+        default:      ShouldNotReachHere();
+      }
+    } else if (right->is_constant()) {
+	    jint c = right->as_constant_ptr()->as_jint();
+	    switch (code) {
+		    case lir_add: {
+					  //  switch (c) {
+					  //    case  1: __ incl(laddr);    break;
+					  //   case -1: __ decl(laddr);    break;
+					  //  default: __ addl(laddr, c); break;
+					  //FIXME, If c is not a 16-imm, it will be wrong here
+					  __ lw(AT, laddr);
+					  __ addi(AT, AT, c);
+					  __ sw(AT, laddr);
+					  //}
+					  break;
+				  }
+		    case lir_sub: {
+					  //  switch (c)
+					  // {
+					  // case  1: __ decl(laddr);    break;
+					  // case -1: __ incl(laddr);    break;
+					  //default: __ subl(laddr, c); break;
+					  __ lw(AT, laddr);
+					  __ addi(AT, AT, -c);
+					  __ sw(AT, laddr);
+					  //}
+					  break;
+				  }
+		    default: ShouldNotReachHere();
+	    }
+    } else {
+	    ShouldNotReachHere();
+    }
+
+
+    }
+
+  else {
+		ShouldNotReachHere();
+	}
+}
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op *op) {
+//FIXME,lir_log, lir_log10,lir_abs,lir_sqrt,so many new lir instruction  @jerome
+if (value->is_double_fpu()) {
+   // assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
+    switch(code) {
+      case lir_log   : //__ flog() ; break;
+      case lir_log10 : //__ flog10() ;
+               Unimplemented();
+	      break;
+      case lir_abs   : __ abs_d(dest->as_double_reg(), value->as_double_reg()) ; break;
+      case lir_sqrt  : __ sqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
+      case lir_sin   :
+        // Should consider not saving ebx if not necessary
+        __ trigfunc('s', 0);
+        break;
+      case lir_cos :
+        // Should consider not saving ebx if not necessary
+       // assert(op->as_Op2()->fpu_stack_size() <= 6, "sin and cos need two free stack slots");
+        __ trigfunc('c', 0);
+        break;
+      case lir_tan :
+        // Should consider not saving ebx if not necessary
+        __ trigfunc('t', 0);
+        break;
+      default      : ShouldNotReachHere();
+    }
+  } else {
+    Unimplemented();
+  }
+
+
+ }
+//FIXME, if right is on the stack!
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
+	if (left->is_single_cpu()) {
+		Register dstreg = dst->as_register();
+		Register reg = left->as_register();
+		if (right->is_constant()) {
+			int val = right->as_constant_ptr()->as_jint();
+			__ move(AT, val);
+			switch (code) {
+			case lir_logic_and:
+				__ andr (dstreg, reg, AT);
+				break;
+		//	case lir_logic_orcc: // fall through
+			case lir_logic_or:
+				__ orr(dstreg, reg, AT);
+				break;
+			case lir_logic_xor:
+				__ xorr(dstreg, reg, AT);
+				break;
+			default: ShouldNotReachHere();
+			}
+		} else if (right->is_stack()) {
+			// added support for stack operands
+			Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
+			switch (code) {
+				case lir_logic_and: //__ andl (reg, raddr);
+					__ lw(AT,raddr);
+					__ andr (reg, reg,AT);
+					break;
+				case lir_logic_or:
+					// __ orl  (reg, raddr);
+					__ lw(AT,raddr);
+					__ orr (reg, reg,AT);
+					break;
+				case lir_logic_xor:
+					// __ xorl (reg, raddr);
+					__ lw(AT,raddr);
+					__ xorr(reg,reg,AT);
+					break;
+				default: ShouldNotReachHere();
+			}
+		} else {
+			Register rright = right->as_register();
+			switch (code) {
+				case lir_logic_and: __ andr (dstreg, reg, rright); break;
+						    //case lir_logic_orcc: // fall through
+				case lir_logic_or : __ orr  (dstreg, reg, rright); break;
+				case lir_logic_xor: __ xorr (dstreg, reg, rright); break;
+				default: ShouldNotReachHere();
+			}
+		}
+	} else {
+		Register l_lo = left->as_register_lo();
+		Register l_hi = left->as_register_hi();
+		Register dst_lo = dst->as_register_lo();
+		Register dst_hi = dst->as_register_hi();
+
+		if (right->is_constant()) {
+//			assert_different_registers(l_lo, l_hi, dst_lo, dst_hi);
+
+			int r_lo = right->as_constant_ptr()->as_jint_lo();
+			int r_hi = right->as_constant_ptr()->as_jint_hi();
+
+			switch (code) {
+				case lir_logic_and:
+					__ move(AT, r_lo);
+					__ andr(dst_lo, l_lo, AT);
+					__ move(AT, r_hi);
+					__ andr(dst_hi, l_hi, AT);
+					break;
+
+				case lir_logic_or:
+					__ move(AT, r_lo);
+					__ orr(dst_lo, l_lo, AT);
+					__ move(AT, r_hi);
+					__ orr(dst_hi, l_hi, AT);
+					break;
+
+				case lir_logic_xor:
+					__ move(AT, r_lo);
+					__ xorr(dst_lo, l_lo, AT);
+					__ move(AT, r_hi);
+					__ xorr(dst_hi, l_hi, AT);
+					break;
+
+				default: ShouldNotReachHere();
+			}
+
+		} else {
+			Register r_lo = right->as_register_lo();
+			Register r_hi = right->as_register_hi();
+#if 0
+			assert_different_registers(l_lo, l_hi, dst_lo, dst_hi, r_lo, r_hi);
+#endif
+
+			switch (code) {
+				case lir_logic_and:
+					__ andr(dst_lo, l_lo, r_lo);
+					__ andr(dst_hi, l_hi, r_hi);
+					break;
+				case lir_logic_or:
+					__ orr(dst_lo, l_lo, r_lo);
+					__ orr(dst_hi, l_hi, r_hi);
+					break;
+				case lir_logic_xor:
+					__ xorr(dst_lo, l_lo, r_lo);
+					__ xorr(dst_hi, l_hi, r_hi);
+					break;
+				default: ShouldNotReachHere();
+			}
+		}
+	}
+}
+
+// we assume that eax and edx can be overwritten
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
+
+	assert(left->is_single_cpu(),   "left must be register");
+	assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
+	assert(result->is_single_cpu(), "result must be register");
+
+	Register lreg = left->as_register();
+	Register dreg = result->as_register();
+
+	if (right->is_constant()) {
+		int divisor = right->as_constant_ptr()->as_jint();
+		assert(divisor!=0, "must be nonzero");
+		__ move(AT, divisor);
+		__ div(lreg, AT);
+		__ nop();
+		__ nop();
+	} else {
+		Register rreg = right->as_register();
+		int idivl_offset = code_offset();
+		__ div(lreg, rreg);
+		__ nop();
+		__ nop();
+		add_debug_info_for_div0(idivl_offset, info);
+	}
+
+	// get the result
+	if (code == lir_irem) {
+		__ mfhi(dreg);
+	} else if (code == lir_idiv) {
+		__ mflo(dreg);
+	} else {
+		ShouldNotReachHere();
+	}
+}
+
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst,LIR_Op2 * op) {
+	Register dstreg = dst->as_register();
+	if (code == lir_cmp_fd2i) {
+		if (left->is_single_fpu()) {
+			FloatRegister leftreg = left->as_float_reg();
+			FloatRegister rightreg = right->as_float_reg();
+
+			Label done;
+			// equal?
+			__ c_eq_s(leftreg, rightreg);
+			__ bc1t(done);
+			__ delayed();
+			__ move(dstreg, ZERO);
+			// less?
+			__ c_olt_s(leftreg, rightreg);
+			__ bc1t(done);
+			__ delayed();
+			__ move(dstreg, -1);
+			// great
+			__ move(dstreg, 1);
+
+			__ bind(done);
+		} else {
+			assert(left->is_double_fpu(), "Must double");
+			FloatRegister leftreg = left->as_double_reg();
+			FloatRegister rightreg = right->as_double_reg();
+
+			Label done;
+			// equal?
+			__ c_eq_d(leftreg, rightreg);
+			__ bc1t(done);
+			__ delayed();
+			__ move(dstreg, ZERO);
+			// less?
+			__ c_olt_d(leftreg, rightreg);
+			__ bc1t(done);
+			__ delayed();
+			__ move(dstreg, -1);
+			// great
+			__ move(dstreg, 1);
+
+			__ bind(done);
+		}
+	} else if (code == lir_ucmp_fd2i) {
+		if (left->is_single_fpu()) {
+			FloatRegister leftreg = left->as_float_reg();
+			FloatRegister rightreg = right->as_float_reg();
+
+			Label done;
+			// equal?
+			__ c_eq_s(leftreg, rightreg);
+			__ bc1t(done);
+			__ delayed();
+			__ move(dstreg, ZERO);
+			// less?
+			__ c_ult_s(leftreg, rightreg);
+			__ bc1t(done);
+			__ delayed();
+			__ move(dstreg, -1);
+			// great
+			__ move(dstreg, 1);
+
+			__ bind(done);
+		} else {
+			assert(left->is_double_fpu(), "Must double");
+			FloatRegister leftreg = left->as_double_reg();
+			FloatRegister rightreg = right->as_double_reg();
+
+			Label done;
+			// equal?
+			__ c_eq_d(leftreg, rightreg);
+			__ bc1t(done);
+			__ delayed();
+			__ move(dstreg, ZERO);
+			// less?
+			__ c_ult_d(leftreg, rightreg);
+			__ bc1t(done);
+			__ delayed();
+			__ move(dstreg, -1);
+			// great
+			__ move(dstreg, 1);
+
+			__ bind(done);
+		}
+	} else {
+		assert(code == lir_cmp_l2i, "check");
+		Register l_lo, l_hi, r_lo, r_hi, d_lo, d_hi;
+		l_lo = left->as_register_lo();
+		l_hi = left->as_register_hi();
+		r_lo = right->as_register_lo();
+		r_hi = right->as_register_hi();
+
+		Label done;
+		// less?
+		__ slt(AT, l_hi, r_hi);
+		__ bne(AT, ZERO, done);
+		__ delayed();
+		__ move(dstreg, -1);
+		// great?
+		__ slt(AT, r_hi, l_hi);
+		__ bne(AT, ZERO, done);
+		__ delayed();
+		__ move(dstreg, 1);
+
+		// now compare low 32 bits
+		// below?
+		__ sltu(AT, l_lo, r_lo);
+		__ bne(AT, ZERO, done);
+		__ delayed();
+		__ move(dstreg, -1);
+		// above?
+		__ sltu(AT, r_lo, l_lo);
+		__ bne(AT, ZERO, done);
+		__ delayed();
+		__ move(dstreg, 1);
+		// equal
+		__ move(dstreg, ZERO);
+
+		__ bind(done);
+	}
+}
+
+
+void LIR_Assembler::align_call(LIR_Code code) {
+	if (os::is_MP()) {
+		// make sure that the displacement word of the call ends up word aligned
+		int offset = __ offset();
+		switch (code) {
+			case lir_static_call:
+			case lir_optvirtual_call:
+				offset += NativeCall::displacement_offset;
+				break;
+			case lir_icvirtual_call:
+				offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
+				break;
+			case lir_virtual_call:  // currently, sparc-specific for niagara
+			default: ShouldNotReachHere();
+		}
+		while (offset++ % BytesPerWord != 0) {
+			__ nop();
+		}
+	}
+
+}
+
+
+void LIR_Assembler::call(address entry, relocInfo::relocType rtype, CodeEmitInfo* info) {
+  	assert(!os::is_MP() || (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
+         	"must be aligned");
+
+  	__ call(entry, rtype);
+  	__ delayed()->nop();
+  	add_call_info(code_offset(), info);
+}
+
+
+void LIR_Assembler::ic_call(address entry, CodeEmitInfo* info) {
+	RelocationHolder rh = virtual_call_Relocation::spec(pc());
+	int oop_index = __ oop_recorder()->allocate_index((jobject)Universe::non_oop_word());
+	RelocationHolder rspec = oop_Relocation::spec(oop_index);
+	__ relocate(rspec);
+	__ lui(IC_Klass, Assembler::split_high((int)Universe::non_oop_word()));
+	__ addiu(IC_Klass, IC_Klass, Assembler::split_low((int)Universe::non_oop_word()));
+
+	__ call(entry, rh);
+	__ delayed()->nop();
+	add_call_info(code_offset(), info);
+}
+
+
+/* Currently, vtable-dispatch is only enabled for sparc platforms */
+void LIR_Assembler::vtable_call(int vtable_offset, CodeEmitInfo* info) {
+    ShouldNotReachHere();
+}
+
+
+
+void LIR_Assembler::emit_static_call_stub() {
+	address call_pc = __ pc();
+	address stub = __ start_a_stub(call_stub_size);
+	if (stub == NULL) {
+		bailout("static call stub overflow");
+		return;
+	}
+
+	int start = __ offset();
+	/*  if (os::is_MP()) {
+	// make sure that the displacement word of the call ends up word aligned
+	int offset = __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset;
+	while (offset++ % BytesPerWord != 0) {
+	__ nop();
+	}
+	}
+	*/
+	__ relocate(static_stub_Relocation::spec(call_pc));
+	//__ movl(ebx, (jobject)NULL);
+	//__ lw(T7, (jobject)NULL);
+//	__ move(T7, ZERO);
+	jobject o=NULL;
+	int oop_index = __ oop_recorder()->allocate_index((jobject)o);
+	RelocationHolder rspec = oop_Relocation::spec(oop_index);
+	__ relocate(rspec);
+	__ lui(T7, Assembler::split_high((int)o));
+	__ addiu(T7, T7, Assembler::split_low((int)o));
+
+	// must be set to -1 at code generation time
+	// assert(!os::is_MP() || ((__ offset() + 1) % BytesPerWord) == 0, "must be aligned on MP");
+	//__ jmp((address)-1, relocInfo::runtime_call_type);
+        //jerome_for_debug
+        __ lui(AT, Assembler::split_high((int)-1));
+	__ addiu(AT, AT, Assembler::split_low((int)-1));
+   	__ jr(AT);
+	__ delayed()->nop();
+	assert(__ offset() - start <= call_stub_size, "stub too big")
+	__ end_a_stub();
+
+
+}
+
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info, bool unwind) {
+	assert(exceptionOop->as_register()== V0, "must match");
+	assert(unwind || exceptionPC->as_register()== V1, "must match");
+
+	// exception object is not added to oop map by LinearScan
+	// (LinearScan assumes that no oops are in fixed registers)
+
+	info->add_register_oop(exceptionOop);
+	if (!unwind) {
+		// get current pc information
+		// pc is only needed if the method has an exception handler, the unwind code does not need it.
+		int pc_for_athrow  = (int)__ pc();
+		int pc_for_athrow_offset = __ offset();
+		Register epc = exceptionPC->as_register();
+		//__ nop();
+		// pc_for_athrow can not point to itself (relocInfo restriction), no need now
+		__ relocate(relocInfo::internal_pc_type);
+		__ lui(epc, Assembler::split_high(pc_for_athrow));
+		__ addiu(epc, epc, Assembler::split_low(pc_for_athrow));
+		add_call_info(pc_for_athrow_offset, info); // for exception handler
+		__ verify_not_null_oop(V0);
+		// search an exception handler (eax: exception oop, edx: throwing pc)
+		if (compilation()->has_fpu_code()) {
+			__ call(Runtime1::entry_for(Runtime1::handle_exception_id),
+				relocInfo::runtime_call_type);
+		} else {
+			__ call(Runtime1::entry_for(Runtime1::handle_exception_nofpu_id),
+				relocInfo::runtime_call_type);
+		}
+	} else {
+		__ call(Runtime1::entry_for(Runtime1::unwind_exception_id),
+				relocInfo::runtime_call_type);
+	}
+
+	// enough room for two byte trap
+	__ delayed()->nop();
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+  // optimized version for linear scan:
+  // * count must be already in ECX (guaranteed by LinearScan)
+  // * left and dest must be equal
+  // * tmp must be unused
+// for mips , i think , count store in which register is not required
+  assert(count->as_register() == SHIFT_count, "count must be in ECX");
+  assert(left == dest, "left and dest must be equal");
+  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
+
+  if (left->is_single_cpu()) {
+    /*Register value = left->as_register();
+    assert(value != SHIFT_count, "left cannot be ECX");
+
+    switch (code) {
+      case lir_shl:  __ shll(value); break;
+      case lir_shr:  __ sarl(value); break;
+      case lir_ushr: __ shrl(value); break;
+      default: ShouldNotReachHere();
+    }
+ */
+    Register value_reg = left->as_register();
+    Register count_reg = count->as_register();
+    Register dest_reg = dest->as_register();
+    assert_different_registers(count_reg, value_reg);
+
+		switch (code) {
+      case lir_shl:  __ sllv(dest_reg, value_reg, count_reg); break;
+      case lir_shr:  __ srav(dest_reg, value_reg, count_reg); break;
+      case lir_ushr: __ srlv(dest_reg, value_reg, count_reg); break;
+      default: ShouldNotReachHere();
+    }
+
+  } else if (left->is_double_cpu()) {
+  /*  Register lo = left->as_register_lo();
+    Register hi = left->as_register_hi();
+    assert(lo != SHIFT_count && hi != SHIFT_count, "left cannot be ECX");
+
+    switch (code) {
+      case lir_shl:  __ lshl(hi, lo);        break;
+      case lir_shr:  __ lshr(hi, lo, true);  break;
+      case lir_ushr: __ lshr(hi, lo, false); break;
+      default: ShouldNotReachHere();
+
+     */
+    Register creg = count->as_register();
+    Register lo = left->as_register_lo();
+    Register hi = left->as_register_hi();
+    Register dlo = dest->as_register_lo();
+    Register dhi = dest->as_register_hi();
+
+		__ andi(creg, creg, 0x3f);
+	switch (code) {
+		case lir_shl:
+			{
+				Label normal, done, notZero;
+
+				//count=0
+				__ bne(creg, ZERO, notZero);
+				__ delayed()->nop();
+				__ move(dlo, lo);
+				__ b(done);
+				__ delayed();
+				__ move(dhi, hi);
+
+				//count>=32
+				__ bind(notZero);
+				__ sltiu(AT, creg, BitsPerWord);
+				__ bne(AT, ZERO, normal);
+				__ delayed();
+				__ addiu(AT, creg, (-1) * BitsPerWord);
+				__ sllv(dhi, lo, AT);
+				__ b(done);
+				__ delayed();
+				__ move(dlo, ZERO);
+
+				//count<32
+				__ bind(normal);
+				__ sllv(dhi, hi, creg);
+				__ move(AT, BitsPerWord);
+				__ sub(AT, AT, creg);
+				__ srlv(AT, lo, AT);
+				__ orr(dhi, dhi, AT);
+				__ sllv(dlo, lo, creg);
+				__ bind(done);
+			}
+			break;
+		case lir_shr:
+			{
+				Label normal, done, notZero;
+
+				//count=0
+				__ bne(creg, ZERO, notZero);
+				__ delayed()->nop();
+				__ move(dhi, hi);
+				__ b(done);
+				__ delayed();
+				__ move(dlo, lo);
+
+				//count>=32
+				__ bind(notZero);
+				__ sltiu(AT, creg, BitsPerWord);
+				__ bne(AT, ZERO, normal);
+				__ delayed();
+				__ addiu(AT, creg, (-1) * BitsPerWord);
+				__ srav(dlo, hi, AT);
+				__ b(done);
+				__ delayed();
+				__ sra(dhi, hi, BitsPerWord - 1);
+
+				//count<32
+				__ bind(normal);
+				__ srlv(dlo, lo, creg);
+				__ move(AT, BitsPerWord);
+				__ sub(AT, AT, creg);
+				__ sllv(AT, hi, AT);
+				__ orr(dlo, dlo, AT);
+				__ srav(dhi, hi, creg);
+				__ bind(done);
+			}
+			break;
+		case lir_ushr:
+			{
+				Label normal, done, notZero;
+
+				//count=zero
+				__ bne(creg, ZERO, notZero);
+				__ delayed()->nop();
+				__ move(dhi, hi);
+				__ b(done);
+				__ delayed();
+				__ move(dlo, lo);
+
+				//count>=32
+				__ bind(notZero);
+				__ sltiu(AT, creg, BitsPerWord);
+				__ bne(AT, ZERO, normal);
+				__ delayed();
+				__ addi(AT, creg, (-1) * BitsPerWord);
+				__ srlv(dlo, hi, AT);
+				__ b(done);
+				__ delayed();
+				__ move(dhi, ZERO);
+
+				//count<32
+				__ bind(normal);
+				__ srlv(dlo, lo, creg);
+				__ move(AT, BitsPerWord);
+				__ sub(AT, AT, creg);
+				__ sllv(AT, hi, AT);
+				__ orr(dlo, dlo, AT);
+				__ srlv(dhi, hi, creg);
+				__ bind(done);
+			}
+			break;
+		default: ShouldNotReachHere();
+	}
+  } else {
+    ShouldNotReachHere();
+  }
+
+}
+
+// i add the 64 bit shift op here
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint  count, LIR_Opr dest) {
+
+	if (left->is_single_cpu()) {
+		Register value_reg = left->as_register();
+		Register dest_reg = dest->as_register();
+		count = count & 0x1F; // Java spec
+
+		switch (code) {
+			case lir_shl:  __ sll(dest_reg, value_reg, count); break;
+			case lir_shr:  __ sra(dest_reg, value_reg, count); break;
+			case lir_ushr: __ srl(dest_reg, value_reg, count); break;
+			default: ShouldNotReachHere();
+		}
+
+	} else if (dest->is_double_cpu()) {
+		Register valuelo = left->as_register_lo();
+		Register valuehi = left->as_register_hi();
+		Register destlo = dest->as_register_lo();
+		Register desthi = dest->as_register_hi();
+		assert_different_registers(destlo, valuehi, desthi);
+		count = count & 0x3f;
+
+		switch (code) {
+			case lir_shl:
+				if (count==0) {
+					__ move(destlo, valuelo);
+					__ move(desthi, valuehi);
+				} else if (count>=32) {
+					__ sll(desthi, valuelo, count-32);
+					__ move(destlo, ZERO);
+				} else {
+					__ srl(AT, valuelo, 32 - count);
+					__ sll(destlo, valuelo, count);
+					__ sll(desthi, valuehi, count);
+					__ orr(desthi, desthi, AT);
+				}
+				break;
+
+			case lir_shr:
+				if (count==0) {
+					__ move(destlo, valuelo);
+					__ move(desthi, valuehi);
+				} else if (count>=32) {
+					__ sra(destlo, valuehi, count-32);
+					__ sra(desthi, valuehi, 31);
+				} else {
+					__ sll(AT, valuehi, 32 - count);
+					__ sra(desthi, valuehi, count);
+					__ srl(destlo, valuelo, count);
+					__ orr(destlo, destlo, AT);
+				}
+				break;
+
+			case lir_ushr:
+				if (count==0) {
+					__ move(destlo, valuelo);
+					__ move(desthi, valuehi);
+				} else if (count>=32) {
+					__ sra(destlo, valuehi, count-32);
+					__ move(desthi, ZERO);
+				} else {
+					__ sll(AT, valuehi, 32 - count);
+					__ srl(desthi, valuehi, count);
+					__ srl(destlo, valuelo, count);
+					__ orr(destlo, destlo, AT);
+				}
+				break;
+
+			default: ShouldNotReachHere();
+		}
+	} else {
+		ShouldNotReachHere();
+	}
+}
+
+//void LIR_Assembler::push_parameter(Register r, int offset_from_sp_in_words) {
+void LIR_Assembler::store_parameter(Register r, int offset_from_esp_in_words) {
+	assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
+	int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
+	assert(offset_from_esp_in_words < frame_map()->reserved_argument_area_size(), "invalid offset");
+	__ sw (r, SP, offset_from_sp_in_bytes);
+}
+
+
+void LIR_Assembler::store_parameter(jint c,     int offset_from_esp_in_words) {
+	assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
+	int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
+	assert(offset_from_esp_in_words < frame_map()->reserved_argument_area_size(), "invalid offset");
+	__ move(AT, c);
+	__ sw(AT, SP, offset_from_sp_in_bytes);
+}
+
+void LIR_Assembler::store_parameter(jobject o,  int offset_from_esp_in_words) {
+   assert(offset_from_esp_in_words >= 0, "invalid offset from esp");
+   int offset_from_sp_in_bytes = offset_from_esp_in_words * BytesPerWord;
+   assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  // __ movl (Address(esp, offset_from_esp_in_bytes), o);
+   //__ move(AT, o);
+   int oop_index = __ oop_recorder()->find_index(o);
+	RelocationHolder rspec = oop_Relocation::spec(oop_index);
+	__ relocate(rspec);
+	__ lui(AT, Assembler::split_high((int)o));
+	__ addiu(AT, AT, Assembler::split_low((int)o));
+
+   __ sw(AT, SP, offset_from_sp_in_bytes);
+
+}
+
+
+// This code replaces a call to arraycopy; no exception may
+// be thrown in this code, they must be thrown in the System.arraycopy
+// activation frame; we could save some checks if this would not be the case
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+
+
+	ciArrayKlass* default_type = op->expected_type();
+	Register src = op->src()->as_register();
+	Register dst = op->dst()->as_register();
+	Register src_pos = op->src_pos()->as_register();
+	Register dst_pos = op->dst_pos()->as_register();
+	Register length  = op->length()->as_register();
+	Register tmp = T8;
+#ifndef OPT_THREAD
+	Register java_thread = T8;
+#else
+	Register java_thread = TREG;
+#endif
+	CodeStub* stub = op->stub();
+
+	int flags = op->flags();
+	BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+	if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+
+	// if we don't know anything or it's an object array, just go through the generic arraycopy
+	if (default_type == NULL) {
+		Label done;
+// save outgoing arguments on stack in case call to System.arraycopy is needed
+// HACK ALERT. This code used to push the parameters in a hardwired fashion
+// for interpreter calling conventions. Now we have to do it in new style conventions.
+// For the moment until C1 gets the new register allocator I just force all the
+// args to the right place (except the register args) and then on the back side
+// reload the register args properly if we go slow path. Yuck
+
+// this is saved in the caller's reserved argument area
+	//FIXME, maybe It will change something in the stack;
+		  // These are proper for the calling convention
+		//store_parameter(length, 2);
+		//store_parameter(dst_pos, 1);
+		//store_parameter(dst, 0);
+
+	 // these are just temporary placements until we need to reload
+		//store_parameter(src_pos, 3);
+		//store_parameter(src, 4);
+		assert(src == T0 && src_pos == A0, "mismatch in calling convention");
+	// pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint
+
+		__ push(src);
+		__ push(dst);
+		__ push(src_pos);
+		__ push(dst_pos);
+		__ push(length);
+
+
+		// save SP and align
+#ifndef OPT_THREAD
+		__ get_thread(java_thread);
+#endif
+		__ sw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+		__ addi(SP, SP, (-5) * wordSize);
+		__ move(AT, -8);
+		__ andr(SP, SP, AT);
+		// push argument
+		__ sw(length, SP, 4 * wordSize);
+		/*if (dst_pos != A3)*/ __ move(A3, dst_pos);
+		/*if (dst != A2)*/ __ move(A2, dst);
+		/*if (src_pos != A1)*/ __ move(A1, src_pos);
+		/*if (src != A0)*/ __ move(A0, src);
+							// make call
+		address entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
+		__ call(entry, relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		// restore SP
+#ifndef OPT_THREAD
+		__ get_thread(java_thread);
+#endif
+		__ lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+
+//		__ beq(V0, ZERO, done);
+		__ beq(V0, ZERO, *stub->continuation());
+		__ delayed()->nop();
+     		__ super_pop(length);
+     		__ super_pop(dst_pos);
+     		__ super_pop(src_pos);
+     		__ super_pop(dst);
+     		__ super_pop(src);
+
+
+     		__ b(*stub->entry());
+     		__ delayed()->nop();
+     		__ bind(*stub->continuation());
+     		return;
+	}
+	assert(default_type != NULL
+		&& default_type->is_array_klass()
+		&& default_type->is_loaded(),
+		"must be true at this point");
+
+	int elem_size = type2aelembytes(basic_type);
+	int shift_amount;
+	switch (elem_size) {
+		case 1 :shift_amount = 0; break;
+		case 2 :shift_amount = 1; break;
+		case 4 :shift_amount = 2; break;
+		case 8 :shift_amount = 3; break;
+		default:ShouldNotReachHere();
+	}
+
+	Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
+	Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
+	Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
+	Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
+
+	// test for NULL
+	if (flags & LIR_OpArrayCopy::src_null_check) {
+		__ beq(src, ZERO, *stub->entry());
+		__ delayed()->nop();
+	}
+	if (flags & LIR_OpArrayCopy::dst_null_check) {
+		__ beq(dst, ZERO, *stub->entry());
+		__ delayed()->nop();
+	}
+
+	// check if negative
+	if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+		__ bltz(src_pos, *stub->entry());
+		__ delayed()->nop();
+	}
+	if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+		__ bltz(dst_pos, *stub->entry());
+		__ delayed()->nop();
+	}
+	if (flags & LIR_OpArrayCopy::length_positive_check) {
+		__ bltz(length, *stub->entry());
+		__ delayed()->nop();
+	}
+
+	if (flags & LIR_OpArrayCopy::src_range_check) {
+		__ add(AT, src_pos, length);
+		__ lw(tmp, src_length_addr);
+		__ sltu(AT, tmp, AT);
+		__ bne(AT, ZERO, *stub->entry());
+		__ delayed()->nop();
+	}
+	if (flags & LIR_OpArrayCopy::dst_range_check) {
+		__ add(AT, dst_pos, length);
+		__ lw(tmp, dst_length_addr);
+		__ sltu(AT, tmp, AT);
+		__ bne(AT, ZERO, *stub->entry());
+		__ delayed()->nop();
+	}
+
+	if (flags & LIR_OpArrayCopy::type_check) {
+		__ lw(AT, src_klass_addr);
+		__ lw(tmp, dst_klass_addr);
+		__ bne(AT, tmp, *stub->entry());
+		__ delayed()->nop();
+	}
+
+#ifdef ASSERT
+	if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+		// Sanity check the known type with the incoming class.  For the
+		// primitive case the types must match exactly.  For the object array
+		// case, if no type check is needed then the dst type must match the
+		// expected type and the src type is so subtype which we can't check.  If
+		// a type check i needed then at this point the classes are known to be
+		// the same but again which don't know which type so we can't check them.
+		Label known_ok, halt;
+		jobject2reg(default_type->encoding(), AT);
+		__ lw(tmp, dst_klass_addr);
+		if (basic_type != T_OBJECT) {
+			__ bne(AT, tmp, halt);
+			__ delayed()->nop();
+			__ lw(tmp, src_klass_addr);
+		}
+		__ beq(AT, tmp, known_ok);
+		__ delayed()->nop();
+		__ bind(halt);
+		__ stop("incorrect type information in arraycopy");
+		__ bind(known_ok);
+	}
+#endif
+	__ push(src);
+	__ push(dst);
+	__ push(src_pos);
+	__ push(dst_pos);
+	__ push(length);
+
+
+	assert_different_registers(A0, A1, length);
+	__ move(AT, dst_pos);
+	if (shift_amount > 0 && basic_type != T_OBJECT) {
+		__ sll(A2, length, shift_amount);
+	} else {
+		if (length!=A2)
+			__ move(A2, length);
+	}
+        __ move(A3, src_pos );
+        assert_different_registers(A0, dst_pos, dst);
+	__ sll(AT, AT, shift_amount);
+	__ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(basic_type));
+	__ add(A1, dst, AT);
+
+	__ sll(AT, A3, shift_amount);
+	__ addi(AT, AT, arrayOopDesc::base_offset_in_bytes(basic_type));
+	__ add(A0, src, AT);
+
+
+
+	if (basic_type == T_OBJECT) {
+		__ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::oop_arraycopy), 3);
+	} else {
+		__ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::primitive_arraycopy), 3);
+	}
+     	__ super_pop(length);
+     	__ super_pop(dst_pos);
+     	__ super_pop(src_pos);
+     	__ super_pop(dst);
+     	__ super_pop(src);
+
+	__ bind(*stub->continuation());
+}
+
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+	Register obj = op->obj_opr()->as_register();  // may not be an oop
+	Register hdr = op->hdr_opr()->as_register();
+	Register lock = op->lock_opr()->as_register();
+	if (!UseFastLocking) {
+		__ b(*op->stub()->entry());
+	} else if (op->code() == lir_lock) {
+		Register scratch = noreg;
+		if (UseBiasedLocking) {
+			scratch = op->scratch_opr()->as_register();
+		}
+		assert(BasicLock::displaced_header_offset_in_bytes() == 0,
+			"lock_reg must point to the displaced header");
+		// add debug info for NullPointerException only if one is possible
+		int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
+		if (op->info() != NULL) {
+			//add_debug_info_for_null_check_here(op->info());
+			add_debug_info_for_null_check(null_check_offset,op->info());
+		}
+		// done
+	} else if (op->code() == lir_unlock) {
+		assert(BasicLock::displaced_header_offset_in_bytes() == 0,
+			"lock_reg must point to the displaced header");
+		__ unlock_object(hdr, obj, lock, *op->stub()->entry());
+	} else {
+		Unimplemented();
+	}
+	__ bind(*op->stub()->continuation());
+}
+
+
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+	ciMethod* method = op->profiled_method();
+	int bci          = op->profiled_bci();
+
+	// Update counter for all call types
+	ciMethodData* md = method->method_data();
+	if (md == NULL) {
+		bailout("out of memory building methodDataOop");
+		return;
+	}
+	ciProfileData* data = md->bci_to_data(bci);
+	assert(data->is_CounterData(), "need CounterData for calls");
+	assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+	Register mdo  = op->mdo()->as_register();
+
+	int oop_index = __ oop_recorder()->find_index(md->encoding());
+	RelocationHolder rspec = oop_Relocation::spec(oop_index);
+	__ relocate(rspec);
+	__ lui(mdo, Assembler::split_high((int)md->encoding()));
+	__ addiu(mdo, mdo, Assembler::split_low((int)md->encoding()));
+
+	Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+	__ lw(AT,counter_addr);
+	__ addi(AT,AT, DataLayout::counter_increment);
+	__ sw(AT,counter_addr);
+
+	Bytecodes::Code bc = method->java_code_at_bci(bci);
+	// Perform additional virtual call profiling for invokevirtual and
+	// invokeinterface bytecodes
+	if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+			Tier1ProfileVirtualCalls) {
+		assert(op->recv()->is_single_cpu(), "recv must be allocated");
+		Register recv = op->recv()->as_register();
+		assert_different_registers(mdo, recv);
+		assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+		ciKlass* known_klass = op->known_holder();
+		if (Tier1OptimizeVirtualCallProfiling && known_klass != NULL) {
+			// We know the type that will be seen at this call site; we can
+			// statically update the methodDataOop rather than needing to do
+			// dynamic tests on the receiver type
+
+			// NOTE: we should probably put a lock around this search to
+			// avoid collisions by concurrent compilations
+			ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+			uint i;
+			for (i = 0; i < VirtualCallData::row_limit(); i++) {
+				ciKlass* receiver = vc_data->receiver(i);
+				if (known_klass->equals(receiver)) {
+					Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+					__ lw(AT,data_addr);
+					__ addi(AT,AT,DataLayout::counter_increment);
+					__ sw(AT,data_addr);
+					return;
+				}
+			}
+
+			// Receiver type not found in profile data; select an empty slot
+
+			// Note that this is less efficient than it should be because it
+			// always does a write to the receiver part of the
+			// VirtualCallData rather than just the first time
+			for (i = 0; i < VirtualCallData::row_limit(); i++) {
+				ciKlass* receiver = vc_data->receiver(i);
+				if (receiver == NULL) {
+					Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
+					int oop_index = __ oop_recorder()->find_index(known_klass->encoding());
+					RelocationHolder rspec = oop_Relocation::spec(oop_index);
+					__ relocate(rspec);
+					__ lui(AT, Assembler::split_high((int)known_klass->encoding()));
+					__ addiu(AT, AT, Assembler::split_low((int)known_klass->encoding()));
+					__ sw(AT,recv_addr);
+					Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+					__ lw(AT, data_addr);
+					__ addi(AT,AT,DataLayout::counter_increment);
+					__ sw(AT,data_addr);
+					return;
+				}
+			}
+		} else {
+			__ lw(recv, Address(recv, oopDesc::klass_offset_in_bytes()));
+			Label update_done;
+			uint i;
+			for (i = 0; i < VirtualCallData::row_limit(); i++) {
+				Label next_test;
+				// See if the receiver is receiver[n].
+				__ lw(AT,Address(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))));
+				__ bne(recv,AT,next_test);
+				__ delayed()->nop();
+				Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+				__ lw(AT,data_addr);
+				__ addi(AT,AT,DataLayout::counter_increment);
+				__ sw(AT,data_addr);
+				__ b(update_done);
+				__ delayed()->nop();
+				__ bind(next_test);
+			}
+
+			// Didn't find receiver; find next empty slot and fill it in
+			for (i = 0; i < VirtualCallData::row_limit(); i++) {
+				Label next_test;
+				Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
+				__ lw(AT,recv_addr);
+				__ bne(AT,ZERO,next_test);
+				__ delayed()->nop();
+				__ sw(recv,recv_addr);
+				__ move(AT,DataLayout::counter_increment);
+				__ sw(AT,Address(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))));
+				if (i < (VirtualCallData::row_limit() - 1)) {
+					__ b(update_done);
+					__ delayed()->nop();
+				}
+				__ bind(next_test);
+			}
+
+			__ bind(update_done);
+		}
+	}
+}
+
+void LIR_Assembler::emit_delay(LIR_OpDelay*) {
+	  Unimplemented();
+}
+
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
+        __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
+}
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+	if (left->is_single_cpu()) {
+		__ subu(dest->as_register(), ZERO, left->as_register());
+	} else if (left->is_double_cpu()) {
+		Register lo = left->as_register_lo();
+		Register hi = left->as_register_hi();
+		Register dlo = dest->as_register_lo();
+		Register dhi = dest->as_register_hi();
+		__ nor(dlo, ZERO, lo);
+		__ addiu(dlo, dlo, 1);
+		__ sltiu(AT, dlo, 1);
+		__ nor(dhi, ZERO, hi);
+		__ addu(dhi, dhi, AT);
+	} else if (left->is_single_fpu()) {
+		//for mips , does it required ?
+		__ neg_s(dest->as_float_reg(), left->as_float_reg());
+	} else if (left->is_double_fpu()) {
+		//for mips , does it required ?
+		__ neg_d(dest->as_double_reg(), left->as_double_reg());
+	}else {
+		ShouldNotReachHere();
+	}
+}
+
+
+void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
+	assert(addr->is_address() && dest->is_register(), "check");
+	Register reg = dest->as_register();
+	__ lea(dest->as_register(), as_Address(addr->as_address_ptr()));
+}
+
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+	if (o == NULL) {
+		// This seems wrong as we do not emit relocInfo
+		// for classes that are not loaded yet, i.e., they will be
+		// never GC'd
+		NEEDS_CLEANUP
+		int oop_index = __ oop_recorder()->allocate_index(o);
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(reg, Assembler::split_high((int)o));
+		__ addiu(reg, reg, Assembler::split_low((int)o));
+		//	__ move(reg, ZERO);
+	} else {
+		int oop_index = __ oop_recorder()->find_index(o);
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(reg, Assembler::split_high((int)o));
+		__ addiu(reg, reg, Assembler::split_low((int)o));
+	}
+}
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+ 	assert(!tmp->is_valid(), "don't need temporary");
+  	__ call(dest, relocInfo::runtime_call_type);
+  	__ delayed()->nop();
+  	if (info != NULL) {
+	  	add_call_info_here(info);
+  	}
+}
+
+/*  by yyq 7/22/2009
+ *  i don't know the register allocator will allocate long or double in two consecutive registers
+ *  if the allocator do like this, the lws below should be removed and lds be used.
+ */
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+	assert(type == T_LONG, "only for volatile long fields");
+	if (info != NULL) {
+		add_debug_info_for_null_check_here(info);
+	}
+
+	if(src->is_register() && dest->is_address()) {
+		if(src->is_double_cpu()) {
+    			__ sw(src->as_register_lo(), as_Address(dest->as_address_ptr()));
+    			__ sw(src->as_register_hi(), as_Address(dest->as_address_ptr()).base(),
+				as_Address(dest->as_address_ptr()).disp() +4);
+		} else if (src->is_double_fpu()) {
+    			__ swc1(src->as_fpu_lo(), as_Address(dest->as_address_ptr()));
+    			__ swc1(src->as_fpu_hi(), as_Address(dest->as_address_ptr()).base(),
+				as_Address(dest->as_address_ptr()).disp() +4);
+
+		} else {
+      			ShouldNotReachHere();
+		}
+	} else if (src->is_address() && dest->is_register()){
+		if(dest->is_double_cpu()) {
+    			__ lw(dest->as_register_lo(), as_Address(src->as_address_ptr()));
+    			__ lw(dest->as_register_hi(), as_Address(src->as_address_ptr()).base(),
+				as_Address(src->as_address_ptr()).disp() +4);
+		} else if (dest->is_double_fpu()) {
+    			__ lwc1(dest->as_fpu_lo(), as_Address(src->as_address_ptr()));
+    			__ lwc1(dest->as_fpu_hi(), as_Address(src->as_address_ptr()).base(),
+				as_Address(src->as_address_ptr()).disp() +4);
+		} else {
+      			ShouldNotReachHere();
+		}
+
+	} else {
+      		ShouldNotReachHere();
+	}
+}
+
+
+void LIR_Assembler::membar() {
+	__ sync();
+}
+
+void LIR_Assembler::membar_acquire() {
+	__ sync();
+}
+
+void LIR_Assembler::membar_release() {
+	__ sync();
+}
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+	assert(result_reg->is_register(), "check");
+#ifndef OPT_THREAD
+	__ get_thread(result_reg->as_register());
+#else
+	__ move(result_reg->as_register(), TREG);
+#endif
+}
+
+void LIR_Assembler::peephole(LIR_List*) {
+	// do nothing for now
+}
+
+#undef __
+
+void LIR_Assembler::align_backward_branch_target() {
+}
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_LIRAssembler_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+ private:
+
+  Address::ScaleFactor array_element_size(BasicType type) const;
+
+  void monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register new_hdr, int monitor_no, Register exception);
+
+  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack);
+
+  // helper functions which checks for overflow and sets bailout if it
+  // occurs.  Always returns a valid embeddable pointer but in the
+  // bailout case the pointer won't be to unique storage.
+  address float_constant(float f);
+  address double_constant(double d);
+
+  bool is_literal_address(LIR_Address* addr);
+
+  // When we need to use something other than rscratch1 use this
+  // method.
+  Address as_Address(LIR_Address* addr, Register tmp);
+
+
+public:
+
+  void store_parameter(Register r, int offset_from_esp_in_words);
+  void store_parameter(jint c,     int offset_from_esp_in_words);
+  void store_parameter(jobject c,  int offset_from_esp_in_words);
+
+  enum { call_stub_size = NOT_LP64(24) LP64_ONLY(28),
+         exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
+         deopt_handler_size = NOT_LP64(12) LP64_ONLY(17)
+       };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_LIRGenerator_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,1278 @@
+/*
+ * Copyright 2005-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_c1_LIRGenerator_mips.cpp.incl"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+// Item will be loaded into a byte register; Intel only
+void LIRItem::load_byte_item() {
+  load_item();
+  LIR_Opr res = result();
+
+  if (!res->is_virtual() || !_gen->is_vreg_flag_set(res, LIRGenerator::byte_reg)) {
+    // make sure that it is a byte register
+    assert(!value()->type()->is_float() && !value()->type()->is_double(),
+           "can't load floats in byte register");
+    LIR_Opr reg = _gen->rlock_byte(T_BYTE);
+    __ move(res, reg);
+
+    _result = reg;
+  }
+}
+
+
+void LIRItem::load_nonconstant() {
+  LIR_Opr r = value()->operand();
+  if (r->is_constant()) {
+    _result = r;
+  } else {
+    load_item();
+  }
+}
+
+//--------------------------------------------------------------
+//               LIRGenerator
+//--------------------------------------------------------------
+LIR_Opr LIRGenerator::exceptionOopOpr()              { return FrameMap::_v0_oop_opr;     }
+LIR_Opr LIRGenerator::exceptionPcOpr()               { return FrameMap::_v1_opr;     }
+LIR_Opr LIRGenerator::divInOpr()                     { return FrameMap::_a0_opr; }//FIXME
+LIR_Opr LIRGenerator::divOutOpr()                    { return FrameMap::_f0_opr; } //FIXME
+LIR_Opr LIRGenerator::remOutOpr()                    { return FrameMap::_f0_opr; } //FIXME
+LIR_Opr LIRGenerator::shiftCountOpr()                { return FrameMap::_t3_opr; } //
+LIR_Opr LIRGenerator::syncTempOpr()                  { return FrameMap::_t2_opr;     }
+LIR_Opr LIRGenerator::getThreadTemp()                { return  LIR_OprFact::illegalOpr;  } //
+
+
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
+	LIR_Opr opr;
+	switch (type->tag()) {
+		case intTag:
+			{
+				opr = FrameMap::_v0_opr;
+				break;
+			}
+		case objectTag:
+			{
+				opr = FrameMap::_v0_oop_opr;
+				break;
+			}
+		case longTag:
+			{
+				opr = FrameMap::_v0_v1_long_opr;
+				break;
+			}
+		case floatTag:
+			{
+				opr = FrameMap::_f0_float_opr;
+				break;
+			}
+		case doubleTag:  {
+					 opr = FrameMap::_d0_double_opr;
+					 break;
+				 }
+		case addressTag:
+		default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+	}
+
+	assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+	return opr;
+}
+
+LIR_Opr LIRGenerator::rlock_callee_saved(BasicType type) {
+	LIR_Opr reg = new_register(type);
+	set_vreg_flag(reg, callee_saved);
+	return reg;
+}
+
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+	return new_register(T_INT);
+}
+
+/*
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+  LIR_Opr reg = new_register(T_INT);
+  set_vreg_flag(reg, LIRGenerator::byte_reg);
+  return reg;
+}
+*/
+
+//--------- loading items into registers --------------------------------
+
+
+// i486 instructions can inline constants
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+  if (type == T_SHORT || type == T_CHAR) {
+    // there is no immediate move of word values in asembler_i486.?pp
+    return false;
+  }
+  Constant* c = v->as_Constant();
+  if (c && c->state() == NULL) {
+    // constants of any type can be stored directly, except for
+    // unloaded object constants.
+    return true;
+  }
+  return false;
+}
+
+
+bool LIRGenerator::can_inline_as_constant(Value v) const {
+	if (v->type()->is_constant() && v->type()->as_IntConstant() != NULL) {
+		return Assembler::is_simm16(v->type()->as_IntConstant()->value());
+	} else {
+	  return false;
+	}
+}
+
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
+	if (c->type() == T_INT && c->as_constant() != NULL) {
+		return Assembler::is_simm16(c->as_jint());
+	} else {
+	  return false;
+	}
+}
+
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+  return LIR_OprFact::illegalOpr;
+}
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+                                            int shift, int disp, BasicType type) {
+	/*  assert(base->is_register(), "must be");
+	    if (index->is_constant()) {
+	    return new LIR_Address(base,
+	    (index->as_constant_ptr()->as_jint() << shift) + disp,
+	    type);
+	    } else {
+	    return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type);
+	    }
+	    */
+	assert(base->is_register(), "must be");
+
+	if (index->is_constant()) {
+		disp += index->as_constant_ptr()->as_jint() << shift;
+		if (Assembler::is_simm16(disp)) {
+			return new LIR_Address(base,disp, type);
+		} else {
+
+		if(disp!=0){
+			LIR_Opr tmp = new_register(T_INT);
+
+			__ move(LIR_OprFact::intConst((int)disp), tmp);
+			__ add(tmp, base, tmp);
+			return new LIR_Address(tmp, 0, type);
+
+		}
+		else
+			return new LIR_Address(base, 0, type);
+
+		}
+
+	}
+	else if( index->is_register()){
+
+		LIR_Opr tmpa = new_register(T_INT);
+		__ move(index, tmpa);
+		__ shift_left(tmpa, shift, tmpa);
+		__ add(tmpa,base, tmpa);
+		if (Assembler::is_simm16(disp)) {
+			return new LIR_Address(tmpa, disp, type);
+		} else {
+
+		if(disp!=0){
+			LIR_Opr tmp = new_register(T_INT);
+
+			__ move(LIR_OprFact::intConst((int)disp), tmp);
+			__ add(tmp, tmpa, tmp);
+			return new LIR_Address(tmp, 0, type);
+		}
+		else
+			return new LIR_Address(tmpa, 0, type);
+		}
+
+	}
+	else {
+
+		if (Assembler::is_simm16(disp)) {
+			return new LIR_Address(base,disp, type);
+		} else {
+		if(disp!=0){
+			LIR_Opr tmp = new_register(T_INT);
+
+			__ move(LIR_OprFact::intConst((int)disp), tmp);
+			__ add(tmp, base, tmp);
+			return new LIR_Address(tmp, 0, type);
+		}
+	        else
+			return new LIR_Address(base, 0, type);
+		}
+
+
+
+
+	}
+}
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,BasicType type, bool needs_card_mark) {
+	int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
+
+	LIR_Address* addr;
+	if (index_opr->is_constant()) {
+		int elem_size = _type2aelembytes[type];
+		addr = new LIR_Address(array_opr,
+				offset_in_bytes + index_opr->as_jint() * elem_size, type);
+	} else if( index_opr->is_register()){
+		LIR_Opr tmp = new_register(T_INT);
+		__ move(index_opr, tmp);
+		__ shift_left(tmp, LIR_Address::scale(type),tmp);
+		__ add(tmp, array_opr, tmp);
+		addr =  new LIR_Address(tmp, offset_in_bytes,type);
+		//		addr =  new LIR_Address(array_opr,
+//				index_opr,
+//				LIR_Address::scale(type),
+//				offset_in_bytes, type);
+
+	}
+	else{
+		addr = new LIR_Address(array_opr,
+				offset_in_bytes, type);
+	}
+
+	if (needs_card_mark) {
+		// This store will need a precise card mark, so go ahead and
+		// compute the full adddres instead of computing once for the
+		// store and again for the card mark.
+		LIR_Opr tmp = new_register(T_INT);
+		__ leal(LIR_OprFact::address(addr), tmp);
+		return new LIR_Address(tmp, 0, type);
+	} else {
+		return addr;
+	}
+
+
+}
+
+void LIRGenerator::increment_counter(address counter, int step) {
+	LIR_Opr temp = new_register(T_INT);
+	LIR_Opr pointer = new_register(T_INT);
+	__ move(LIR_OprFact::intConst((int)counter), pointer);
+	LIR_Opr addr = (LIR_Opr)new LIR_Address(pointer, 0, T_INT);
+	LIR_Opr c = LIR_OprFact::intConst((int)step);
+	__ add(addr, c, addr);
+}
+
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+  Unimplemented();
+}
+
+/*
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+  Unimplemented();
+}
+
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+  __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
+}
+
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info) {
+  __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
+}
+*/
+
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+  if (tmp->is_valid()) {
+    if (is_power_of_2(c + 1)) {
+      __ move(left, tmp);
+      __ shift_left(left, log2_intptr(c + 1), left);
+      __ sub(left, tmp, result);
+      return true;
+    } else if (is_power_of_2(c - 1)) {
+      __ move(left, tmp);
+      __ shift_left(left, log2_intptr(c - 1), left);
+      __ add(left, tmp, result);
+      return true;
+    }
+  }
+  return false;
+}
+
+void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
+	BasicType type = item->type();
+	__ store(item, new LIR_Address(FrameMap::_sp_opr, in_bytes(offset_from_sp), type));
+}
+
+
+//----------------------------------------------------------------------
+//             visitor functions
+//----------------------------------------------------------------------
+
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+  assert(x->is_root(),"");
+  bool needs_range_check = true;
+  bool use_length = x->length() != NULL;
+  bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+  bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+                                         !get_jobject_constant(x->value())->is_null_object());
+
+  LIRItem array(x->array(), this);
+  LIRItem index(x->index(), this);
+  LIRItem value(x->value(), this);
+  LIRItem length(this);
+
+  array.load_item();
+  index.load_nonconstant();
+
+  if (use_length) {
+    needs_range_check = x->compute_needs_range_check();
+    if (needs_range_check) {
+      length.set_instruction(x->length());
+      length.load_item();
+    }
+  }
+  if (needs_store_check) {
+    value.load_item();
+  } else {
+    value.load_for_store(x->elt_type());
+  }
+
+  set_no_result(x);
+
+  // the CodeEmitInfo must be duplicated for each different
+  // LIR-instruction because spilling can occur anywhere between two
+  // instructions and so the debug information must be different
+  CodeEmitInfo* range_check_info = state_for(x);
+  CodeEmitInfo* null_check_info = NULL;
+  if (x->needs_null_check()) {
+    null_check_info = new CodeEmitInfo(range_check_info);
+  }
+
+  // emit array address setup early so it schedules better
+  LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+
+  if (GenerateRangeChecks && needs_range_check) {
+    if (use_length) {
+      __ branch(lir_cond_belowEqual, length.result(),index.result(),T_INT,new RangeCheckStub(range_check_info, index.result()));
+    } else {
+      array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+      // range_check also does the null check
+      null_check_info = NULL;
+    }
+  }
+
+  if (GenerateArrayStoreCheck && needs_store_check) {
+    LIR_Opr tmp1 = new_register(objectType);
+    LIR_Opr tmp2 = new_register(objectType);
+    LIR_Opr tmp3 = new_register(objectType);
+
+    CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info);
+  }
+
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+    __ move(value.result(), array_addr, null_check_info);
+    // Seems to be a precise
+    post_barrier(LIR_OprFact::address(array_addr), value.result());
+  } else {
+    __ move(value.result(), array_addr, null_check_info);
+  }
+}
+
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+  assert(x->is_root(),"");
+  LIRItem obj(x->obj(), this);
+  obj.load_item();
+
+  set_no_result(x);
+
+  // "lock" stores the address of the monitor stack slot, so this is not an oop
+  LIR_Opr lock = new_register(T_INT);
+  // Need a scratch register for biased locking on x86
+  LIR_Opr scratch = LIR_OprFact::illegalOpr;
+  if (UseBiasedLocking) {
+    scratch = new_register(T_INT);
+  }
+
+  CodeEmitInfo* info_for_exception = NULL;
+  if (x->needs_null_check()) {
+    info_for_exception = state_for(x, x->lock_stack_before());
+  }
+  // this CodeEmitInfo must not have the xhandlers because here the
+  // object is already locked (xhandlers expect object to be unlocked)
+  CodeEmitInfo* info = state_for(x, x->state(), true);
+  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
+                        x->monitor_no(), info_for_exception, info);
+}
+
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+  assert(x->is_root(),"");
+
+  LIRItem obj(x->obj(), this);
+  obj.dont_load_item();
+
+  LIR_Opr lock = new_register(T_INT);
+  LIR_Opr obj_temp = new_register(T_INT);
+  set_no_result(x);
+  monitor_exit(obj_temp, lock, syncTempOpr(), x->monitor_no());
+}
+
+
+// _ineg, _lneg, _fneg, _dneg
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+  LIRItem value(x->x(), this);
+  value.set_destroys_register();
+  value.load_item();
+  LIR_Opr reg = rlock(x);
+  __ negate(value.result(), reg);
+
+  set_result(x, round_item(reg));
+}
+
+
+
+// for  _fadd, _fmul, _fsub, _fdiv, _frem
+//      _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+	LIRItem left(x->x(), this);
+	LIRItem right(x->y(), this);
+	left.load_item();
+	right.load_item();
+	rlock_result(x);
+	arithmetic_op_fpu(x->op(), x->operand(), left.result(), right.result(), x->is_strictfp());
+}
+
+
+
+
+// for  _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+  switch (x->op()) {
+  case Bytecodes::_lrem:
+  case Bytecodes::_lmul:
+  case Bytecodes::_ldiv: {
+
+    if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
+      LIRItem right(x->y(), this);
+      right.load_item();
+
+      CodeEmitInfo* info = state_for(x);
+      LIR_Opr item = right.result();
+      assert(item->is_register(), "must be");
+//      __ cmp(lir_cond_equal, item, LIR_OprFact::longConst(0));
+ //     __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+      __ branch(lir_cond_equal,item,LIR_OprFact::longConst(0), T_LONG, new DivByZeroStub(info));
+    }
+
+    address entry;
+    switch (x->op()) {
+    case Bytecodes::_lrem:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::lrem);
+      break; // check if dividend is 0 is done elsewhere
+    case Bytecodes::_ldiv:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv);
+      break; // check if dividend is 0 is done elsewhere
+    case Bytecodes::_lmul:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::lmul);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    // order of arguments to runtime call is reversed.
+    LIR_Opr result = call_runtime(x->y(), x->x(), entry, x->type(), NULL);
+    set_result(x, result);
+    break;
+  }
+  case Bytecodes::_ladd:
+  case Bytecodes::_lsub: {
+    LIRItem left(x->x(), this);
+    LIRItem right(x->y(), this);
+    left.load_item();
+    right.load_item();
+    rlock_result(x);
+
+    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+    break;
+  }
+  default: ShouldNotReachHere();
+  }
+}
+
+
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+	bool is_div_rem = x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem;
+	LIRItem left(x->x(), this);
+	LIRItem right(x->y(), this);
+	// missing test if instr is commutative and if we should swap
+	right.load_nonconstant();
+	assert(right.is_constant() || right.is_register(), "wrong state of right");
+	left.load_item();
+	rlock_result(x);
+	if (is_div_rem) {
+		CodeEmitInfo* info = state_for(x);
+		LIR_Opr tmp =new_register(T_INT);
+		if (x->op() == Bytecodes::_irem) {
+			__ irem(left.result(), right.result(), x->operand(), tmp, info);
+		} else if (x->op() == Bytecodes::_idiv) {
+			__ idiv(left.result(), right.result(), x->operand(), tmp, info);
+		}
+	} else {
+		//arithmetic_op_int(x->op(), x->operand(), left.result(),
+		//right.result(), FrameMap::G1_opr);
+
+		LIR_Opr tmp =new_register(T_INT);
+		arithmetic_op_int(x->op(), x->operand(), left.result(), right.result(),
+				tmp);
+	}
+}
+
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+  // when an operand with use count 1 is the left operand, then it is
+  // likely that no move for 2-operand-LIR-form is necessary
+  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+    x->swap_operands();
+  }
+
+  ValueTag tag = x->type()->tag();
+  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+  switch (tag) {
+    case floatTag:
+    case doubleTag:  do_ArithmeticOp_FPU(x);  return;
+    case longTag:    do_ArithmeticOp_Long(x); return;
+    case intTag:     do_ArithmeticOp_Int(x);  return;
+  }
+  ShouldNotReachHere();
+}
+
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+  // count must always be in rcx
+  LIRItem value(x->x(), this);
+  LIRItem count(x->y(), this);
+
+  ValueTag elemType = x->type()->tag();
+  bool must_load_count = !count.is_constant() || elemType == longTag;
+  if (must_load_count) {
+    // count for long must be in register
+    count.load_item_force(shiftCountOpr());
+  } else {
+    count.dont_load_item();
+  }
+  value.load_item();
+  LIR_Opr reg = rlock_result(x);
+
+  shift_op(x->op(), reg, value.result(), count.result(), LIR_OprFact::illegalOpr);
+}
+
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+  // when an operand with use count 1 is the left operand, then it is
+  // likely that no move for 2-operand-LIR-form is necessary
+  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+    x->swap_operands();
+  }
+
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+  right.load_nonconstant();
+  LIR_Opr reg = rlock_result(x);
+
+  logic_op(x->op(), reg, left.result(), right.result());
+}
+
+
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+  ValueTag tag = x->x()->type()->tag();
+  if (tag == longTag) {
+    left.set_destroys_register();
+  }
+  left.load_item();
+  right.load_item();
+  LIR_Opr reg = rlock_result(x);
+
+  if (x->x()->type()->is_float_kind()) {
+    Bytecodes::Code code = x->op();
+    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+  } else if (x->x()->type()->tag() == longTag) {
+    __ lcmp2int(left.result(), right.result(), reg);
+  } else {
+    Unimplemented();
+  }
+}
+
+void LIRGenerator::do_AttemptUpdate(Intrinsic* x) {
+	assert(x->number_of_arguments() == 3, "wrong type");
+	LIRItem obj       (x->argument_at(0), this);  // AtomicLong object
+	LIRItem cmp_value (x->argument_at(1), this);  // value to compare with field
+	LIRItem new_value (x->argument_at(2), this);
+	// replace field with new_value if it matches cmp_value
+
+	// compare value must be in edx,eax (hi,lo); may be destroyed by cmpxchg8 instruction
+	//  cmp_value.load_item_force(FrameMap::eax_edx_long_opr);
+	cmp_value.load_item_force(FrameMap::_a0_a1_long_opr);
+
+	// new value must be in ecx,ebx (hi,lo)
+	// new_value.load_item_force(FrameMap::ebx_ecx_long_opr);
+	new_value.load_item_force(FrameMap::_a2_a3_long_opr);
+	// object pointer register is overwritten with field address
+	obj.load_item();
+
+	// generate compare-and-swap; produces zero condition if swap occurs
+	int value_offset = sun_misc_AtomicLongCSImpl::value_offset();
+	LIR_Opr addr = obj.result();
+	__ add(addr, LIR_OprFact::intConst(value_offset), addr);
+	LIR_Opr t1 = LIR_OprFact::illegalOpr;  // no temp needed
+	LIR_Opr t2 = LIR_OprFact::illegalOpr;  // no temp needed
+	__ cas_long(addr, cmp_value.result(), new_value.result(), t1, t2);
+
+	// generate conditional move of boolean result
+	LIR_Opr result = rlock_result(x);
+	//__ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result);
+	__ move(FrameMap::_at_opr, result);
+}
+
+//FIXME, for mips, compareandswap is a bit different
+//I have no idea use which register
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+	assert(x->number_of_arguments() == 4, "wrong type");
+	LIRItem obj   (x->argument_at(0), this);  // object
+	LIRItem offset(x->argument_at(1), this);  // offset of field
+	LIRItem cmp   (x->argument_at(2), this);  // value to compare with field
+	LIRItem val   (x->argument_at(3), this);  // replace field with val if matches cmp
+
+	assert(obj.type()->tag() == objectTag, "invalid type");
+	assert(offset.type()->tag() == intTag, "invalid type");
+	assert(cmp.type()->tag() == type->tag(), "invalid type");
+	assert(val.type()->tag() == type->tag(), "invalid type");
+
+	// get address of field
+	obj.load_item();
+	offset.load_nonconstant();
+
+	if (type == objectType) {
+		//  cmp.load_item_force(FrameMap::eax_oop_opr);
+		cmp.load_item_force(FrameMap::_a0_oop_opr);
+		val.load_item();
+	} else if (type == intType) {
+		// cmp.load_item_force(FrameMap::eax_opr);
+		cmp.load_item_force(FrameMap::_a0_opr);
+		val.load_item();
+	} else if (type == longType) {
+		//// cmp.load_item_force(FrameMap::eax_edx_long_opr);
+		cmp.load_item_force(FrameMap::_a0_a1_long_opr);
+		// val.load_item_force(FrameMap::ebx_ecx_long_opr);
+		val.load_item_force(FrameMap::_a2_a3_long_opr);
+	} else {
+		ShouldNotReachHere();
+	}
+	LIR_Opr addr = new_register(T_OBJECT);
+	__ move(obj.result(), addr);
+	__ add(addr, offset.result(), addr);
+
+	LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
+	if (type == objectType)
+		__ cas_obj(addr, cmp.result(), val.result(), ill, ill);
+	else if (type == intType)
+		__ cas_int(addr, cmp.result(), val.result(), ill, ill);
+	else if (type == longType)
+		__ cas_long(addr, cmp.result(), val.result(), ill, ill);
+	else {
+		ShouldNotReachHere();
+	}
+	// generate conditional move of boolean result
+	LIR_Opr result = rlock_result(x);
+	//cmove not exist on mips,
+	// __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result);
+	// our implementation of cmpxchg put result in AT
+	//  LIR_Opr result = rlock_result_with_hint(x, hint());
+	__ move(FrameMap::_at_opr, result);
+
+	if (type == objectType)   // Write-barrier needed for Object fields.
+		write_barrier(addr);
+}
+
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+	switch (x->id()) {
+		case vmIntrinsics::_dabs:
+		case vmIntrinsics::_dsqrt: {
+						   assert(x->number_of_arguments() == 1, "wrong type");
+						   LIRItem value(x->argument_at(0), this);
+						   value.load_item();
+						   LIR_Opr dst = rlock_result(x);
+
+						   switch (x->id()) {
+							   case vmIntrinsics::_dsqrt: {
+											      __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+											      break;
+										      }
+							   case vmIntrinsics::_dabs: {
+											     __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+											     break;
+										     }
+						   }
+						   break;
+					   }
+		case vmIntrinsics::_dlog10: // fall through
+		case vmIntrinsics::_dlog: // fall through
+		case vmIntrinsics::_dsin: // fall through
+		case vmIntrinsics::_dtan: // fall through
+		case vmIntrinsics::_dcos: {
+						  assert(x->number_of_arguments() == 1, "wrong type");
+
+						  address runtime_entry = NULL;
+						  switch (x->id()) {
+							  case vmIntrinsics::_dsin:
+								  runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+								  break;
+							  case vmIntrinsics::_dcos:
+								  runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+								  break;
+							  case vmIntrinsics::_dtan:
+								  runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+								  break;
+							  case vmIntrinsics::_dlog:
+								  runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+								  break;
+							  case vmIntrinsics::_dlog10:
+								  runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+								  break;
+							  default:
+								  ShouldNotReachHere();
+						  }
+						  LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
+						  set_result(x, result);
+					  }
+	}
+}
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+	assert(x->number_of_arguments() == 5, "wrong type");
+	// Note: spill caller save before setting the item
+	LIRItem src     (x->argument_at(0), this);
+	LIRItem src_pos (x->argument_at(1), this);
+	LIRItem dst     (x->argument_at(2), this);
+	LIRItem dst_pos (x->argument_at(3), this);
+	LIRItem length  (x->argument_at(4), this);
+	// load all values in callee_save_registers, as this makes the
+	// parameter passing to the fast case simpler
+	src.load_item_force     (FrameMap::_t0_oop_opr);
+	src_pos.load_item_force (FrameMap::_a0_opr);
+	dst.load_item_force     (FrameMap::_a1_oop_opr);
+	dst_pos.load_item_force (FrameMap::_a2_opr);
+	length.load_item_force  (FrameMap::_a3_opr);
+
+	int flags;
+	ciArrayKlass* expected_type;
+	arraycopy_helper(x, &flags, &expected_type);
+
+	CodeEmitInfo* info = state_for(x, x->state());
+	__ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
+			length.result(), rlock_callee_saved(T_INT),
+			expected_type, flags, info);
+	set_no_result(x);
+}
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+LIR_Opr fixed_register_for(BasicType type) {
+	switch (type) {
+		case T_FLOAT:  return FrameMap::_f0_float_opr;
+		case T_DOUBLE: return FrameMap::_d0_double_opr;
+		case T_INT:    return FrameMap::_v0_opr;
+		case T_LONG:   return FrameMap::_v0_v1_long_opr;
+		default:       ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+	}
+}
+
+
+void LIRGenerator::do_Convert(Convert* x) {
+	// flags that vary for the different operations and different SSE-settings
+	bool fixed_input, fixed_result, round_result, needs_stub;
+
+	switch (x->op()) {
+		case Bytecodes::_i2l: // fall through
+		case Bytecodes::_l2i: // fall through
+		case Bytecodes::_i2b: // fall through
+		case Bytecodes::_i2c: // fall through
+		case Bytecodes::_i2s:
+			fixed_input  = false;
+			fixed_result = false;
+			round_result = false;
+			needs_stub   = false; break;
+		case Bytecodes::_f2d:
+			fixed_input  = UseSSE == 1;
+			fixed_result = false;
+			round_result = false;
+			needs_stub   = false; break;
+		case Bytecodes::_d2f:
+			fixed_input  = false;
+			fixed_result = UseSSE == 1;
+			round_result = UseSSE < 1;
+			needs_stub   = false; break;
+		case Bytecodes::_i2f:
+			fixed_input  = false;
+			fixed_result = false;
+			round_result = UseSSE < 1;
+			needs_stub   = false; break;
+		case Bytecodes::_i2d:
+			fixed_input  = false;
+			fixed_result = false;
+			round_result = false;
+			needs_stub   = false; break;
+		case Bytecodes::_f2i:
+			fixed_input  = false;
+			fixed_result = false;
+			round_result = false;
+			needs_stub   = true;  break;
+		case Bytecodes::_d2i:
+			fixed_input  = false;
+			fixed_result = false;
+			round_result = false;
+			needs_stub   = true;  break;
+		case Bytecodes::_l2f:
+			fixed_input  = false;
+			fixed_result = UseSSE >= 1;
+			round_result = UseSSE < 1;
+			needs_stub   = false; break;
+		case Bytecodes::_l2d:
+			fixed_input  = false;
+			fixed_result = UseSSE >= 2;
+			round_result = UseSSE < 2;
+			needs_stub   = false; break;
+		case Bytecodes::_f2l:
+			fixed_input  = true;
+			fixed_result = true;
+			round_result = false;
+			needs_stub   = false; break;
+		case Bytecodes::_d2l:
+			fixed_input  = true;
+			fixed_result = true;
+			round_result = false;
+			needs_stub   = false; break;
+		default: ShouldNotReachHere();
+	}
+
+	LIRItem value(x->value(), this);
+	value.load_item();
+	LIR_Opr input = value.result();
+	LIR_Opr result = rlock(x);
+
+	// arguments of lir_convert
+	LIR_Opr conv_input = input;
+	LIR_Opr conv_result = result;
+	ConversionStub* stub = NULL;
+
+	if (fixed_input) {
+		conv_input = fixed_register_for(input->type());
+		__ move(input, conv_input);
+	}
+
+	assert(fixed_result == false || round_result == false, "cannot set both");
+	if (fixed_result) {
+		conv_result = fixed_register_for(result->type());
+	} else if (round_result) {
+		result = new_register(result->type());
+		set_vreg_flag(result, must_start_in_memory);
+	}
+
+	if (needs_stub) {
+		stub = new ConversionStub(x->op(), conv_input, conv_result);
+	}
+
+	__ convert(x->op(), conv_input, conv_result, stub);
+
+	if (result != conv_result) {
+		__ move(conv_result, result);
+	}
+
+	assert(result->is_virtual(), "result must be virtual register");
+	set_result(x, result);
+}
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+	const LIR_Opr reg = result_register_for(x->type());
+	if (PrintNotLoaded && !x->klass()->is_loaded()) {
+		tty->print_cr("   ###class not loaded at new bci %d", x->bci());
+	}
+	CodeEmitInfo* info = state_for(x, x->state());
+//	LIR_Opr tmp1 = new_register(T_INT);
+//	LIR_Opr tmp2 = new_register(T_INT);
+//	LIR_Opr tmp3 = new_register(T_INT);
+//	LIR_Opr tmp4 = new_register(T_INT);
+	LIR_Opr klass_reg = FrameMap::_t4_oop_opr;
+//	new_instance(reg, x->klass(), FrameMap::_t0_oop_opr, FrameMap::_t1_oop_opr,FrameMap::_t2_oop_opr, LIR_OprFact::illegalOpr, klass_reg, info);
+	new_instance(reg,
+		x->klass(),
+		FrameMap::_t0_oop_opr,
+		FrameMap::_t1_oop_opr,
+		FrameMap::_t2_oop_opr,
+		FrameMap::_t3_oop_opr,
+		FrameMap::_t5_oop_opr,
+		FrameMap::_t6_oop_opr,
+		klass_reg,
+		info);
+	LIR_Opr result = rlock_result(x);
+	__ move(reg, result);
+
+}
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+	CodeEmitInfo* info = state_for(x, x->state());
+
+	LIRItem length(x->length(), this);
+	length.load_item_force(FrameMap::_t2_oop_opr);
+
+	LIR_Opr reg = result_register_for(x->type());
+	//LIR_Opr tmp1 = new_register(T_INT);
+	//LIR_Opr tmp2 = new_register(T_INT);
+	//LIR_Opr tmp3 = new_register(T_INT);
+	//LIR_Opr tmp4 = new_register(T_INT);
+	LIR_Opr tmp1 = FrameMap::_t0_oop_opr;
+	LIR_Opr tmp2 = FrameMap::_t1_oop_opr;
+	LIR_Opr tmp3 = FrameMap::_t3_oop_opr;
+	LIR_Opr tmp4 = FrameMap::_t5_oop_opr;
+	LIR_Opr tmp5 = FrameMap::_t6_oop_opr;
+//	LIR_Opr tmp4 = reg;
+	LIR_Opr klass_reg = FrameMap::_t4_oop_opr;
+	LIR_Opr len = length.result();
+	BasicType elem_type = x->elt_type();
+
+	__ oop2reg(ciTypeArrayKlass::make(elem_type)->encoding(), klass_reg);
+
+	CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+	__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4,tmp5, elem_type, klass_reg, slow_path);
+
+	LIR_Opr result = rlock_result(x);
+	__ move(reg, result);
+}
+
+
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+	LIRItem length(x->length(), this);
+	// in case of patching (i.e., object class is not yet loaded), we
+	// need to reexecute the instruction
+	// and therefore provide the state before the parameters have been consumed
+	CodeEmitInfo* patching_info = NULL;
+	if (!x->klass()->is_loaded() || PatchALot) {
+		patching_info = state_for(x, x->state_before());
+	}
+
+	const LIR_Opr reg = result_register_for(x->type());
+	//LIR_Opr tmp1 = new_register(T_INT);
+	//LIR_Opr tmp2 = new_register(T_INT);
+	//LIR_Opr tmp3 = new_register(T_INT);
+	//LIR_Opr tmp4 = new_register(T_INT);
+	LIR_Opr tmp1 = FrameMap::_t0_oop_opr;
+	LIR_Opr tmp2 = FrameMap::_t1_oop_opr;
+	LIR_Opr tmp3 = FrameMap::_t3_oop_opr;
+	LIR_Opr tmp4 = FrameMap::_t5_oop_opr;
+	LIR_Opr tmp5 = FrameMap::_t6_oop_opr;
+
+	LIR_Opr klass_reg = FrameMap::_t4_oop_opr;
+	length.load_item_force(FrameMap::_t2_oop_opr);
+
+	LIR_Opr len = length.result();
+	CodeEmitInfo* info = state_for(x, x->state());
+
+	CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+	ciObject* obj = (ciObject*) ciObjArrayKlass::make(x->klass());
+	if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+		BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+	}
+	jobject2reg_with_patching(klass_reg, obj, patching_info);
+	__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, tmp5, T_OBJECT, klass_reg, slow_path);
+
+	LIR_Opr result = rlock_result(x);
+	__ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+	Values* dims = x->dims();
+	int i = dims->length();
+	LIRItemList* items = new LIRItemList(dims->length(), NULL);
+	while (i-- > 0) {
+		LIRItem* size = new LIRItem(dims->at(i), this);
+		items->at_put(i, size);
+	}
+
+	// need to get the info before, as the items may become invalid through item_free
+	CodeEmitInfo* patching_info = NULL;
+	if (!x->klass()->is_loaded() || PatchALot) {
+		patching_info = state_for(x, x->state_before());
+		// cannot re-use same xhandlers for multiple CodeEmitInfos, so
+		// clone all handlers.
+		x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+	}
+
+	CodeEmitInfo* info = state_for(x, x->state());
+
+	i = dims->length();
+	while (i-- > 0) {
+		LIRItem* size = items->at(i);
+		size->load_nonconstant();
+		store_stack_parameter(size->result(), in_ByteSize(i*4));
+	}
+
+	LIR_Opr reg = result_register_for(x->type());
+	jobject2reg_with_patching(reg, x->klass(), patching_info);
+
+	//  LIR_Opr rank = FrameMap::ebx_opr;
+	LIR_Opr rank = FrameMap::_t2_opr;
+	__ move(LIR_OprFact::intConst(x->rank()), rank);
+	//  LIR_Opr varargs = FrameMap::ecx_opr;
+	LIR_Opr varargs = FrameMap::_t0_opr;
+	__ move(FrameMap::_sp_opr, varargs);
+	LIR_OprList* args = new LIR_OprList(3);
+	args->append(reg);
+	args->append(rank);
+	args->append(varargs);
+	__ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+			LIR_OprFact::illegalOpr,
+			reg, args, info);
+	LIR_Opr result = rlock_result(x);
+	__ move(reg, result);
+}
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+  // nothing to do for now
+}
+
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+	LIRItem obj(x->obj(), this);
+
+	CodeEmitInfo* patching_info = NULL;
+	if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) {
+		// must do this before locking the destination register as an oop register,
+		// and before the obj is loaded (the latter is for deoptimization)
+		patching_info = state_for(x, x->state_before());
+	}
+	obj.load_item();
+
+	// info for exceptions
+	CodeEmitInfo* info_for_exception = state_for(x, x->state()->copy_locks());
+
+	CodeStub* stub;
+	if (x->is_incompatible_class_change_check()) {
+		assert(patching_info == NULL, "can't patch this");
+		stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
+	} else {
+		stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+	}
+	LIR_Opr reg = rlock_result(x);
+	__ checkcast(reg, obj.result(), x->klass(),
+			new_register(objectType), new_register(objectType),
+			!x->klass()->is_loaded() ? new_register(objectType) : LIR_OprFact::illegalOpr,
+			x->direct_compare(), info_for_exception, patching_info, stub,
+			x->profiled_method(), x->profiled_bci());
+}
+
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+	LIRItem obj(x->obj(), this);
+
+	// result and test object may not be in same register
+	LIR_Opr reg = rlock_result(x);
+	CodeEmitInfo* patching_info = NULL;
+	if ((!x->klass()->is_loaded() || PatchALot)) {
+		// must do this before locking the destination register as an oop register
+		patching_info = state_for(x, x->state_before());
+	}
+	obj.load_item();
+	LIR_Opr tmp = new_register(objectType);
+	__ instanceof(reg, obj.result(), x->klass(),
+			tmp, new_register(objectType), LIR_OprFact::illegalOpr,
+			x->direct_compare(), patching_info);
+}
+
+
+void LIRGenerator::do_If(If* x) {
+	assert(x->number_of_sux() == 2, "inconsistency");
+	ValueTag tag = x->x()->type()->tag();
+	bool is_safepoint = x->is_safepoint();
+
+	If::Condition cond = x->cond();
+
+	LIRItem xitem(x->x(), this);
+	LIRItem yitem(x->y(), this);
+	LIRItem* xin = &xitem;
+	LIRItem* yin = &yitem;
+
+	if (tag == longTag) {
+		// for longs, only conditions "eql", "neq", "lss", "geq" are valid;
+		// mirror for other conditions
+		if (cond == If::gtr || cond == If::leq) {
+			cond = Instruction::mirror(cond);
+			xin = &yitem;
+			yin = &xitem;
+		}
+		xin->set_destroys_register();
+	}
+	xin->load_item();
+	if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && (cond == If::eql || cond == If::neq)) {
+		// inline long zero
+		yin->dont_load_item();
+	} else if (tag == longTag || tag == floatTag || tag == doubleTag) {
+		// longs cannot handle constants at right side
+		yin->load_item();
+	} else {
+		yin->dont_load_item();
+	}
+
+	// add safepoint before generating condition code so it can be recomputed
+	if (x->is_safepoint()) {
+		// increment backedge counter if needed
+		increment_backedge_counter(state_for(x, x->state_before()));
+
+		__ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
+	}
+	set_no_result(x);
+
+	LIR_Opr left = xin->result();
+	LIR_Opr right = yin->result();
+	//  __ cmp(lir_cond(cond), left, right);
+	profile_branch(x, cond, left, right);
+	move_to_phi(x->state());
+	if (x->x()->type()->is_float_kind()) {
+		__ branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux());
+	} else {
+		__ branch(lir_cond(cond), left, right, right->type(), x->tsux());
+	}
+	assert(x->default_sux() == x->fsux(), "wrong destination above");
+	__ jump(x->default_sux());
+}
+
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+#ifdef _LP64
+	return FrameMap::as_pointer_opr(r15_thread);
+#else
+	LIR_Opr result = new_register(T_INT);
+	__ get_thread(result);
+	return result;
+#endif //
+}
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) {
+	store_stack_parameter(LIR_OprFact::intConst(block->block_id()), in_ByteSize(0));
+	LIR_OprList* args = new LIR_OprList();
+	address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry);
+	__ call_runtime_leaf(func, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, args);
+}
+
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+		CodeEmitInfo* info) {
+	if (address->type() == T_LONG) {
+		__ volatile_store_mem_reg(value, address, info);
+	} else {
+		__ store(value, address, info);
+	}
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+			CodeEmitInfo* info) {
+
+	if (address->type() == T_LONG) {
+		__ volatile_load_mem_reg(address, result, info);
+	} else {
+		__ load(address, result, info);
+	}
+}
+
+
+void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
+		BasicType type, bool is_volatile) {
+	__ add(src, src, offset);
+	if (is_volatile && type == T_LONG) {
+		LIR_Address* addr = new LIR_Address(src, 0, T_DOUBLE);
+		LIR_Opr tmp = new_register(T_DOUBLE);
+		__ load(addr, tmp);
+		LIR_Opr spill = new_register(T_LONG);
+		set_vreg_flag(spill, must_start_in_memory);
+		__ move(tmp, spill);
+		__ move(spill, dst);
+	} else {
+		LIR_Address* addr = new LIR_Address(src, 0, type);
+		__ load(addr, dst);
+	}
+}
+
+
+void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
+		BasicType type, bool is_volatile) {
+	__ add(src, src, offset);
+	if (is_volatile && type == T_LONG) {
+		LIR_Address* addr = new LIR_Address(src, 0, T_DOUBLE);
+		LIR_Opr tmp = new_register(T_DOUBLE);
+		LIR_Opr spill = new_register(T_DOUBLE);
+		set_vreg_flag(spill, must_start_in_memory);
+		__ move(data, spill);
+		__ move(spill, tmp);
+		__ move(tmp, addr);
+
+	} else {
+		LIR_Address* addr = new LIR_Address(src, 0, type);
+		bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+		if (is_obj) {
+			// Do the pre-write barrier, if any.
+			pre_barrier(LIR_OprFact::address(addr), false, NULL);
+			__ move(data, addr);
+			assert(src->is_register(), "must be register");
+			// Seems to be a precise address
+			post_barrier(LIR_OprFact::address(addr), data);
+		} else {
+			__ move(data, addr);
+		}
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_LinearScan_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_c1_LinearScan_mips.cpp.incl"
+
+
+//----------------------------------------------------------------------
+// Allocation of FPU stack slots (Intel x86 only)
+//----------------------------------------------------------------------
+
+void LinearScan::allocate_fpu_stack() {
+  Untested("mips");
+  // No FPU stack
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_LinearScan_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2005-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+	return reg_num < 26 || reg_num > 30;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+	if (type == T_LONG || type== T_DOUBLE || type == T_FLOAT) {
+		return 2;
+	}
+	return 1;
+}
+
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+	return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+	assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
+	// return true; // no callee-saved registers on Intel
+	//FIXME, here, MIPS indeed got callee-saved registers
+	return true;
+}
+
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+}
+
+
+// Implementation of LinearScanWalker
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+	if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
+		assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
+		_first_reg = pd_first_callee_saved_reg;
+//		_first_reg = 8;
+		_last_reg = pd_last_callee_saved_reg;
+		return true;
+	} else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT) {
+//		_first_reg = pd_first_cpu_reg;
+		_first_reg = 8;
+		_last_reg = pd_last_allocatable_cpu_reg;
+		return true;
+	}
+	return false;
+}
+
+/*
+class FpuStackAllocator VALUE_OBJ_CLASS_SPEC {
+ private:
+  Compilation* _compilation;
+  LinearScan* _allocator;
+
+  LIR_OpVisitState visitor;
+
+  LIR_List* _lir;
+  int _pos;
+  FpuStackSim _sim;
+  FpuStackSim _temp_sim;
+
+  bool _debug_information_computed;
+
+  LinearScan*   allocator()                      { return _allocator; }
+  Compilation*  compilation() const              { return _compilation; }
+
+  // unified bailout support
+  void          bailout(const char* msg) const   { compilation()->bailout(msg); }
+  bool          bailed_out() const               { return compilation()->bailed_out(); }
+
+  int pos() { return _pos; }
+  void set_pos(int pos) { _pos = pos; }
+  LIR_Op* cur_op() { return lir()->instructions_list()->at(pos()); }
+  LIR_List* lir() { return _lir; }
+  void set_lir(LIR_List* lir) { _lir = lir; }
+  FpuStackSim* sim() { return &_sim; }
+  FpuStackSim* temp_sim() { return &_temp_sim; }
+
+  int fpu_num(LIR_Opr opr);
+  int tos_offset(LIR_Opr opr);
+  LIR_Opr to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset = false);
+
+  // Helper functions for handling operations
+  void insert_op(LIR_Op* op);
+  void insert_exchange(int offset);
+  void insert_exchange(LIR_Opr opr);
+  void insert_free(int offset);
+  void insert_free_if_dead(LIR_Opr opr);
+  void insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore);
+  void insert_copy(LIR_Opr from, LIR_Opr to);
+  void do_rename(LIR_Opr from, LIR_Opr to);
+  void do_push(LIR_Opr opr);
+  void pop_if_last_use(LIR_Op* op, LIR_Opr opr);
+  void pop_always(LIR_Op* op, LIR_Opr opr);
+  void clear_fpu_stack(LIR_Opr preserve);
+  void handle_op1(LIR_Op1* op1);
+  void handle_op2(LIR_Op2* op2);
+  void handle_opCall(LIR_OpCall* opCall);
+  void compute_debug_information(LIR_Op* op);
+  void allocate_exception_handler(XHandler* xhandler);
+  void allocate_block(BlockBegin* block);
+
+#ifndef PRODUCT
+  void check_invalid_lir_op(LIR_Op* op);
+#endif
+
+  // Helper functions for merging of fpu stacks
+  void merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg);
+  void merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot);
+  void merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim);
+  bool merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot);
+  void merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim);
+  void merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs);
+  bool merge_fpu_stack_with_successors(BlockBegin* block);
+
+ public:
+  LIR_Opr to_fpu_stack(LIR_Opr opr); // used by LinearScan for creation of debug information
+
+  FpuStackAllocator(Compilation* compilation, LinearScan* allocator);
+  void allocate();
+};
+*/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_MacroAssembler_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,467 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_c1_MacroAssembler_mips.cpp.incl"
+
+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr,Register scratch, Label& slow_case) {
+	const int aligned_mask = 3;
+	const int hdr_offset = oopDesc::mark_offset_in_bytes();
+
+	// hdr is just a temperary register, it cannot be AT, however
+	if ( hdr == NOREG ) {
+		hdr = T8;
+	}
+
+	assert_different_registers(hdr, obj, disp_hdr);
+	assert(BytesPerWord == 4, "adjust aligned_mask and code");
+	Label done;
+	/*
+	// The following move must be the first instruction of emitted since debug
+	// information may be generated for it.
+	// Load object header
+	lw(hdr, obj, hdr_offset);
+	*/
+	int null_check_offset = -1;
+	verify_oop(obj);
+
+	// save object being locked into the BasicObjectLock
+	sw(obj, disp_hdr, BasicObjectLock::obj_offset_in_bytes());
+	if (UseBiasedLocking) {
+		assert(scratch != noreg, "should have scratch register at this point");
+		null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false,
+				done, &slow_case);
+	} else {
+		null_check_offset = offset();
+	}
+
+	// Load object header
+	//   movl(hdr, Address(obj, hdr_offset));
+	lw(hdr,obj,hdr_offset);
+	// and mark it as unlocked
+	//	ori(hdr, hdr, 1);
+	ori(hdr, hdr, markOopDesc::unlocked_value);
+	// save unlocked object header into the displaced header location on the stack
+	sw(hdr, disp_hdr, 0);
+
+	// test if object header is still the same (i.e. unlocked), and if so, store the
+	// displaced header address in the object header - if it is not the same, get the
+	// object header instead
+	//if (os::is_MP()) MacroAssembler::lock(); // must be immediately before cmpxchg!
+	cmpxchg(disp_hdr, Address(obj, hdr_offset), hdr);
+	// if the object header was the same, we're done
+	if (PrintBiasedLockingStatistics) {
+		//cond_incl(Assembler::equal,
+		//Address((int) BiasedLocking::fast_path_entry_count_addr(), relocInfo::none));
+		// cond_incl(Assembler::equal,
+		// Address((int) BiasedLocking::fast_path_entry_count_addr(), relocInfo::none));
+
+	}
+
+
+	bne(AT, ZERO, done);
+	delayed()->nop();
+	// if the object header was not the same, it is now in the hdr register
+	// => test if it is a stack pointer into the same stack (recursive locking), i.e.:
+	//
+	// 1) (hdr & aligned_mask) == 0
+	// 2) SP <= hdr
+	// 3) hdr <= SP + page_size
+	//
+	// these 3 tests can be done by evaluating the following expression:
+	//
+	// (hdr - SP) & (aligned_mask - page_size)
+	//
+	// assuming both the stack pointer and page_size have their least
+	// significant 2 bits cleared and page_size is a power of 2
+	sub(hdr, hdr, SP);
+	move(AT, aligned_mask - os::vm_page_size());
+	andr(hdr, hdr, AT);
+	// for recursive locking, the result is zero => save it in the displaced header
+	// location (NULL in the displaced hdr location indicates recursive locking)
+	sw(hdr, disp_hdr, 0);
+	// otherwise we don't care about the result and handle locking via runtime call
+	bne(hdr, ZERO, slow_case);
+	delayed()->nop();
+	// done
+	bind(done);
+	return null_check_offset;
+}
+
+
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr,
+		Label& slow_case) {
+	const int aligned_mask = 3;
+	const int hdr_offset = oopDesc::mark_offset_in_bytes();
+
+	// hdr is just a temparay register, however, it cannot be AT
+	if ( hdr == NOREG ) {
+		hdr = T8;
+	}
+
+	assert_different_registers(hdr, obj, disp_hdr);
+	assert(BytesPerWord == 4, "adjust aligned_mask and code");
+	Label done;
+	if (UseBiasedLocking) {
+		// load object
+		//   movl(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+		lw(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+		biased_locking_exit(obj, hdr, done);
+	}
+
+
+
+	// load displaced header
+	lw(hdr, disp_hdr, 0);
+	// if the loaded hdr is NULL we had recursive locking
+	// if we had recursive locking, we are done
+	beq(hdr, ZERO, done);
+	delayed()->nop();
+	// load object
+	if(!UseBiasedLocking){
+		lw(obj, disp_hdr, BasicObjectLock::obj_offset_in_bytes());
+	}
+
+	verify_oop(obj);
+	// test if object header is pointing to the displaced header, and if so, restore
+	// the displaced header in the object - if the object header is not pointing to
+	// the displaced header, get the object header instead
+	//if (os::is_MP()) MacroAssembler::lock(); // must be immediately before cmpxchg!
+	cmpxchg(hdr, Address(obj, hdr_offset), disp_hdr);
+	// if the object header was not pointing to the displaced header,
+	// we do unlocking via runtime call
+	beq(AT, ZERO, slow_case);
+	delayed()->nop();
+	// done
+	bind(done);
+}
+
+
+
+// Defines obj, preserves var_size_in_bytes
+void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) {
+	if (UseTLAB) {
+		tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
+	} else {
+		eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
+	}
+}
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1 , Register t2) {
+	assert_different_registers(obj, klass, len, AT);
+
+	if (UseBiasedLocking && !len->is_valid()) {
+		assert_different_registers(obj, klass, len, t1, t2);
+		//movl(t1, Address(klass, Klass::prototype_header_offset_in_bytes()
+		//+ klassOopDesc::klass_part_offset_in_bytes()));
+		lw(t1, klass, Klass::prototype_header_offset_in_bytes()
+				+ klassOopDesc::klass_part_offset_in_bytes());
+		//movl(Address(obj, oopDesc::mark_offset_in_bytes()), t1);
+		sw(t1, obj, oopDesc::mark_offset_in_bytes());
+	} else {
+		move(AT, (int)markOopDesc::prototype());
+		sw(AT, obj, oopDesc::mark_offset_in_bytes());
+	}
+	sw(klass, obj, oopDesc::klass_offset_in_bytes());
+	if (len->is_valid()) {
+		sw(len, obj, arrayOopDesc::length_offset_in_bytes());
+	}
+}
+
+// preserves obj, destroys len_in_bytes
+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
+	Label done;
+	assert_different_registers(obj, t1, len_in_bytes);
+	assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0,
+			"header size is not a multiple of BytesPerWord");
+	Register index = len_in_bytes;
+	Register ptr = t1;
+
+	assert(is_simm16(hdr_size_in_bytes), "change this code");
+	addi(index, index, - hdr_size_in_bytes);
+	beq(index, ZERO, done);
+	delayed();
+
+	// initialize topmost word, divide index by 2, check if odd and test if zero
+	// note: for the remaining code to work, index must be a multiple of BytesPerWord
+#ifdef ASSERT
+	{
+		Label L;
+		andi(AT, index, BytesPerWord - 1);
+		beq(AT, ZERO, L);
+		delayed()->nop();
+		stop("index is not a multiple of BytesPerWord");
+		bind(L);
+	}
+#endif
+	// index could have been not a multiple of 8 (i.e., bit 2 was set)
+	{
+		Label even;
+		// note: if index was a multiple of 8, than it cannot
+		//       be 0 now otherwise it must have been 0 before
+		//       => if it is even, we don't need to check for 0 again
+		andi(AT, index, 4);
+		shr(index, 3);
+		sll(index, index, 3);
+		beq(AT, ZERO, even);
+		delayed()->add(ptr, obj, index);
+		// clear topmost word (no jump needed if conditional assignment would work here)
+		sw(ZERO, ptr, hdr_size_in_bytes);
+		// index could be 0 now, need to check again
+		beq(index, ZERO, done);
+		delayed()->nop();
+		bind(even);
+	}
+	// initialize remaining object fields: edx is a multiple of 2 now
+	{
+		Label loop;
+		bind(loop);
+		sw(ZERO, ptr, hdr_size_in_bytes - 1*BytesPerWord);
+		sw(ZERO, ptr, hdr_size_in_bytes - 2*BytesPerWord);
+
+		addi(index, index, - 2 * wordSize);
+		bne(index, ZERO, loop);
+		delayed()->addi(ptr, ptr, - 2 * wordSize);
+	}
+
+	// done
+	bind(done);
+}
+
+void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
+  //assert(obj == rax, "obj must be in rax, for cmpxchg");
+  assert(obj != t1 && obj != t2 && t1 != t2, "registers must be different"); // XXX really?
+  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
+
+  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
+
+  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
+}
+
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
+	assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
+			"con_size_in_bytes is not multiple of alignment");
+	const int hdr_size_in_bytes = instanceOopDesc::base_offset_in_bytes();
+
+	//  initialize_header(obj, klass, NOREG);
+	initialize_header(obj, klass, NOREG,t1,t2);
+
+	// clear rest of allocated space
+	const Register index = t2;
+	//FIXME, x86 changed the value in jdk6
+	// const int threshold = hdr_size_in_bytes + 36;
+	// // approximate break even point for code size (see comments below)
+	const int threshold = 6 * BytesPerWord;
+	// approximate break even point for code size (see comments below)
+	if (var_size_in_bytes != NOREG) {
+		move(index, var_size_in_bytes);
+		initialize_body(obj, index, hdr_size_in_bytes, t1);
+	} else if (con_size_in_bytes <= threshold) {
+		// use explicit null stores
+		// code size = 4*n bytes (n = number of fields to clear)
+
+		for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) {
+			sw(ZERO, obj, i);
+		}
+
+
+
+	} else if(con_size_in_bytes > hdr_size_in_bytes) {
+		// use loop to null out the fields
+		// code size = 32 bytes for even n (n = number of fields to clear)
+		// initialize last object field first if odd number of fields
+		assert( ((con_size_in_bytes - hdr_size_in_bytes) >> 3)!=0, "change code here");
+
+		move(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
+		sll(t1, index, 3);
+		add(t1, obj, t1);
+
+		// initialize last object field if constant size is odd
+		if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0) {
+			sw(ZERO, t1, hdr_size_in_bytes);
+		}
+		// initialize remaining object fields: edx is a multiple of 2
+		{
+			Label loop;
+			bind(loop);
+			sw(ZERO, t1, hdr_size_in_bytes - (1*BytesPerWord));
+			sw(ZERO, t1, hdr_size_in_bytes - (2*BytesPerWord));
+			addi(index, index, -1);
+			bne(index, ZERO, loop);
+			delayed()->addi(t1, t1, - 2 * wordSize);
+		}
+	}
+
+	if (DTraceAllocProbes) {
+		//assert(obj == eax, "must be");
+		call(CAST_FROM_FN_PTR(address,
+	             Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)), relocInfo::runtime_call_type);
+		delayed()->nop();
+	}
+	verify_oop(obj);
+}
+
+void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, Register t3,int header_size,
+					int scale, Register klass, Label& slow_case) {
+	assert(obj == V0, "obj must be in V0 for cmpxchg");
+	assert_different_registers(obj, len, t1, t2, t3,klass, AT);
+
+        // determine alignment mask
+	assert(BytesPerWord == 4, "must be a multiple of 2 for masking code to work");
+
+         // check for negative or excessive length
+         //const int max_length = 0x00FFFFFF;
+         //      move(AT, max_length);
+        move(AT, max_array_allocation_length);
+	sltu(AT, AT, len);
+	bne(AT, ZERO, slow_case);
+	delayed()->nop();
+
+	const Register arr_size = t3;
+	// align object end
+	move(arr_size, header_size * BytesPerWord + MinObjAlignmentInBytesMask);
+	sll(AT, len, scale);
+	add(arr_size, arr_size, AT);
+	move(AT, ~MinObjAlignmentInBytesMask);
+	andr(arr_size, arr_size, AT);
+
+	try_allocate(obj, arr_size, 0, t1, t2, slow_case);
+
+	initialize_header(obj, klass, len,t1,t2);
+
+         // clear rest of allocated space
+	const Register len_zero = len;
+	initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
+	if (DTraceAllocProbes) {
+		// assert(obj == eax, "must be");
+		call(CAST_FROM_FN_PTR(address,
+		Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)),
+					relocInfo::runtime_call_type);
+		delayed()->nop();
+	}
+
+	verify_oop(obj);
+}
+
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
+	verify_oop(receiver);
+	// explicit NULL check not needed since load from [klass_offset] causes a trap
+	// check against inline cache
+	assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
+	///cmpl(iCache, Address(receiver, oopDesc::klass_offset_in_bytes()));
+	// if icache check fails, then jump to runtime routine
+	// Note: RECEIVER must still contain the receiver!
+	Label L;
+	lw(AT, receiver, oopDesc::klass_offset_in_bytes());
+	beq(AT, iCache, L);
+	delayed()->nop();
+	//	jmp(Runtime1::entry_for(Runtime1::handle_ic_miss_id), relocInfo::runtime_call_type);
+        jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
+	delayed()->nop();
+	bind(L);
+}
+
+void C1_MacroAssembler::method_exit(bool restore_frame) {
+	if (restore_frame) {
+		leave();
+	}
+	jr(RA);
+	delayed()->nop();
+}
+
+
+void C1_MacroAssembler::build_frame(int frame_size_in_bytes) {
+  // Make sure there is enough stack space for this method's activation.
+  // Note that we do this before doing an enter(). This matches the
+  // ordering of C2's stack overflow check / esp decrement and allows
+  // the SharedRuntime stack overflow handling to be consistent
+  // between the two compilers.
+  	generate_stack_overflow_check(frame_size_in_bytes);
+
+	enter();
+//FIXME
+#ifdef TIERED
+    // c2 leaves fpu stack dirty. Clean it on entry
+ //  if (UseSSE < 2 ) {
+       empty_FPU_stack();
+  //  }
+#endif // TIERED
+
+  decrement(SP, frame_size_in_bytes); // does not emit code for frame_size == 0
+}
+
+void C1_MacroAssembler::unverified_entry(Register receiver, Register ic_klass) {
+  if (C1Breakpoint) int3();
+  inline_cache_check(receiver, ic_klass);
+}
+
+
+void C1_MacroAssembler::verified_entry() {
+  if (C1Breakpoint)int3();
+  // build frame
+  verify_FPU(0, "method_entry");
+}
+
+
+#ifndef PRODUCT
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+	if (!VerifyOops) return;
+	//  verify_oop_addr(Address(esp, stack_offset));
+	verify_oop_addr(Address(SP, stack_offset));
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+	if (!VerifyOops) return;
+	Label not_null;
+	// testl(r, r);
+	//jcc(Assembler::notZero, not_null);
+	bne(r,ZERO,not_null);
+	delayed()->nop();
+	stop("non-null oop required");
+	bind(not_null);
+	verify_oop(r);
+}
+
+void C1_MacroAssembler::invalidate_registers(bool inv_v0, bool inv_v1, bool inv_t3, bool inv_t7, bool inv_s0, bool inv_s7) {
+#ifdef ASSERT
+	/*  if (inv_eax) movl(eax, 0xDEAD);
+	    if (inv_ebx) movl(ebx, 0xDEAD);
+	    if (inv_ecx) movl(ecx, 0xDEAD);
+	    if (inv_edx) movl(edx, 0xDEAD);
+	    if (inv_esi) movl(esi, 0xDEAD);
+	    if (inv_edi) movl(edi, 0xDEAD);
+	    */
+	//if (inv_v0) move(V0, 0xDEAD);
+	//if (inv_v1) move(V1, 0xDEAD);
+	//if (inv_t3) move(T3, 0xDEAD);
+	//if (inv_t7) move(T7, 0xDEAD);
+	//if (inv_s0) move(S0, 0xDEAD);
+	//if (inv_s7) move(S7, 0xDEAD);
+#endif
+}
+#endif // ifndef PRODUCT
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_MacroAssembler_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,120 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// C1_MacroAssembler contains high-level macros for C1
+
+private:
+int _sp_offset;    // track sp changes
+// initialization
+void pd_init() { _sp_offset = 0; }
+
+public:
+void try_allocate(
+		Register obj,                      // result: pointer to object after successful allocation
+		Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+		int      con_size_in_bytes,        // object size in bytes if   known at compile time
+		Register t1,                       // temp register
+		Register t2,                       // temp register
+		Label&   slow_case                 // continuation point if fast allocation fails
+		);
+
+void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
+void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1);
+
+// locking
+// hdr     : must be rax, contents destroyed
+// obj     : must point to the object to lock, contents preserved
+// disp_hdr: must point to the displaced header location, contents preserved
+// scratch : scratch register, contents destroyed
+// returns code offset at which to add null check debug information
+int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
+
+// unlocking
+// hdr     : contents destroyed
+// obj     : must point to the object to lock, contents preserved
+// disp_hdr: must be eax & must point to the displaced header location, contents destroyed
+void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
+
+void initialize_object(
+		Register obj,                      // result: pointer to object after successful allocation
+		Register klass,                    // object klass
+		Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+		int      con_size_in_bytes,        // object size in bytes if   known at compile time
+		Register t1,                       // temp register
+		Register t2                        // temp register
+		);
+
+// allocation of fixed-size objects
+// (can also be used to allocate fixed-size arrays, by setting
+// hdr_size correctly and storing the array length afterwards)
+// obj        : must be rax, will contain pointer to allocated object
+// t1, t2     : scratch registers - contents destroyed
+// header_size: size of object header in words
+// object_size: total size of object in words
+// slow_case  : exit to slow case implementation if fast allocation fails
+void allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case);
+
+enum {
+	max_array_allocation_length = 0x00FFFFFF
+};
+
+// allocation of arrays
+// obj        : must be rax, will contain pointer to allocated object
+// len        : array length in number of elements
+// t          : scratch register - contents destroyed
+// header_size: size of object header in words
+// f          : element scale factor
+// slow_case  : exit to slow case implementation if fast allocation fails
+void allocate_array(Register obj, Register len, Register t1, Register t2, Register t3, int header_size, int scale, Register klass, Label& slow_case);
+
+int  sp_offset() const { return _sp_offset; }
+void set_sp_offset(int n) { _sp_offset = n; }
+
+// Note: NEVER push values directly, but only through following push_xxx functions;
+//       This helps us to track the rsp changes compared to the entry rsp (->_sp_offset)
+
+void push_jint (jint i)     { _sp_offset++; move(AT, (int)i); push(AT); }
+void push_oop  (jobject o)  { ShouldNotReachHere(); _sp_offset++; move(AT, (int)o); push(AT);}
+// Seems to always be in wordSize
+void push_addr (Address a)  { _sp_offset++; addi(AT, a.base(), a.disp()); push(AT);}
+void push_reg  (Register r) { _sp_offset++; push(r); }
+void pop_reg   (Register r) { _sp_offset--; pop(r); assert(_sp_offset >= 0, "stack offset underflow"); }
+void super_pop(Register r) {MacroAssembler::pop(r);}
+
+void dec_stack (int nof_words) {
+	_sp_offset -= nof_words;
+	assert(_sp_offset >= 0, "stack offset underflow");
+	//addptr(rsp, wordSize * nof_words);
+	addi(SP, SP, wordSize * nof_words);
+}
+
+void dec_stack_after_call (int nof_words) {
+	_sp_offset -= nof_words;
+	assert(_sp_offset >= 0, "stack offset underflow");
+}
+
+void invalidate_registers(bool inv_rax, bool inv_rbx, bool inv_rcx, bool inv_rdx, bool inv_rsi, bool inv_rdi) PRODUCT_RETURN;
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_Runtime1_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,1465 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_c1_Runtime1_mips.cpp.incl"
+
+
+// Implementation of StubAssembler
+// this method will preserve the stack space for arguments as indicated by args_size
+// for stack alignment consideration, you cannot call this with argument in stack.
+// if you need >3 arguments, you must implement this method yourself.
+int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, int args_size) {
+	// i use S7 for edi.
+	// setup registers
+	const Register thread = TREG; // is callee-saved register (Visual C++ calling conventions)
+	assert(!(oop_result1->is_valid() || oop_result2->is_valid()) || oop_result1 != oop_result2,                            "registers must be different");
+	assert(oop_result1 != thread && oop_result2 != thread, "registers must be different");
+	assert(args_size >= 0, "illegal args_size");
+
+	set_num_rt_args(1 + args_size);
+
+
+	// push java thread (becomes first argument of C function)
+#ifndef OPT_THREAD
+	get_thread(thread);
+#endif
+	move(A0, thread);
+
+	set_last_Java_frame(thread, NOREG, FP, NULL);
+	addi(SP, SP, - wordSize * (1+args_size));
+	move(AT, -8);
+	andr(SP, SP, AT);
+
+	relocate(relocInfo::internal_pc_type);
+	{
+		int save_pc = (int)pc() +  12 + NativeCall::return_address_offset;
+		lui(AT, Assembler::split_high(save_pc));
+		addiu(AT, AT, Assembler::split_low(save_pc));
+	}
+	sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+	// do the call
+	lui(T9, Assembler::split_high((int)entry));
+	addiu(T9, T9, Assembler::split_low((int)entry));
+	jalr(T9);
+	delayed()->nop();
+	int call_offset = offset();
+
+	// verify callee-saved register
+#ifdef ASSERT
+	guarantee(thread != V0, "change this code");
+	push(V0);
+	{
+		Label L;
+		get_thread(V0);
+		beq(thread, V0, L);
+		delayed()->nop();
+		int3();
+		stop("StubAssembler::call_RT: edi not callee saved?");
+		bind(L);
+	}
+	super_pop(V0);
+#endif
+	// discard thread and arguments
+	lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); //by yyq
+	//FIXME , in x86 version , the second parameter is false, why true here? @jerome, 12/31, 06
+	//  reset_last_Java_frame(thread, true);
+	reset_last_Java_frame(thread, true, true);
+	// check for pending exceptions
+	{
+		Label L;
+		lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+		beq(AT, ZERO, L);
+		delayed()->nop();
+		// exception pending => remove activation and forward to exception handler
+		// make sure that the vm_results are cleared
+		if (oop_result1->is_valid()) {
+			sw(ZERO, thread, in_bytes(JavaThread::vm_result_offset()));
+		}
+		if (oop_result2->is_valid()) {
+			sw(ZERO, thread, in_bytes(JavaThread::vm_result_2_offset()));
+		}
+		// the leave() in x86 just pops ebp and remains the return address on the top
+		// of stack
+		// the return address will be needed by forward_exception_entry()
+		if (frame_size() == no_frame_size) {
+			addiu(SP, FP, wordSize);
+			lw(FP, SP, (-1) * wordSize);
+			jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+			delayed()->nop();
+		} else if (_stub_id == Runtime1::forward_exception_id) {
+			should_not_reach_here();
+		} else {
+			jmp(Runtime1::entry_for(Runtime1::forward_exception_id),
+					relocInfo::runtime_call_type);
+			delayed()->nop();
+		}
+
+
+		bind(L);
+	}
+	// get oop results if there are any and reset the values in the thread
+	if (oop_result1->is_valid()) {
+		lw(oop_result1, thread, in_bytes(JavaThread::vm_result_offset()));
+		sw(ZERO, thread, in_bytes(JavaThread::vm_result_offset()));
+		verify_oop(oop_result1);
+	}
+	if (oop_result2->is_valid()) {
+		lw(oop_result2, thread, in_bytes(JavaThread::vm_result_2_offset()));
+		sw(ZERO, thread, in_bytes(JavaThread::vm_result_2_offset()));
+		verify_oop(oop_result2);
+	}
+	return call_offset;
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1) {
+	///pushl(arg1);
+	if (arg1 != A1) move(A1, arg1);
+	return call_RT(oop_result1, oop_result2, entry, 1);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1, Register arg2) {
+	///pushl(arg2);
+	///pushl(arg1);
+	if (arg1!=A1) move(A1, arg1);
+	if (arg2!=A2) move(A2, arg2); assert(arg2 != A1, "smashed argument");
+	return call_RT(oop_result1, oop_result2, entry, 2);
+}
+
+
+int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address entry, Register arg1, Register arg2, Register arg3) {
+	///pushl(arg3);
+	///pushl(arg2);
+	///pushl(arg1);
+	if (arg1!=A1) move(A1, arg1);
+	if (arg2!=A2) move(A2, arg2); assert(arg2 != A1, "smashed argument");
+	if (arg3!=A3) move(A3, arg3); assert(arg3 != A1 && arg3 != A2, "smashed argument");
+	return call_RT(oop_result1, oop_result2, entry, 3);
+}
+
+
+// Implementation of StubFrame
+
+class StubFrame: public StackObj {
+	private:
+		StubAssembler* _sasm;
+
+	public:
+		StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
+		void load_argument(int offset_in_words, Register reg);
+		~StubFrame();
+};
+
+
+#define __ _sasm->
+
+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
+	_sasm = sasm;
+	__ set_info(name, must_gc_arguments);
+	__ enter();
+}
+
+
+//FIXME, I have no idea the frame architecture of mips
+// load parameters that were stored with LIR_Assembler::store_parameter
+// // Note: offsets for store_parameter and load_argument must match
+void StubFrame::load_argument(int offset_in_words, Register reg) {
+	//ebp + 0: link
+	//    + 1: return address
+	//    + 2: argument with offset 0
+	//    + 3: argument with offset 1
+	//    + 4: ...
+	//__ movl(reg, Address(ebp, (offset_in_words + 2) * BytesPerWord));
+	__ lw(reg, Address(FP, (offset_in_words + 2) * BytesPerWord));
+}
+StubFrame::~StubFrame() {
+	__ leave();
+	__ jr(RA);
+	__ delayed()->nop();
+}
+
+#undef __
+
+
+// Implementation of Runtime1
+
+#define __ sasm->
+
+//static OopMap* save_live_registers(MacroAssembler* sasm, int num_rt_args);
+//static void restore_live_registers(MacroAssembler* sasm);
+//DeoptimizationBlob* SharedRuntime::_deopt_blob = NULL;
+/*
+const int fpu_stack_as_doubles_size_in_words = 16;
+const int fpu_stack_as_doubles_size = 64;
+*/
+const int float_regs_as_doubles_size_in_words = 16;
+//const int xmm_regs_as_doubles_size_in_words = 16;
+
+//FIXME,
+// Stack layout for saving/restoring  all the registers needed during a runtime
+// call (this includes deoptimization)
+// Note: note that users of this frame may well have arguments to some runtime
+// while these values are on the stack. These positions neglect those arguments
+// but the code in save_live_registers will take the argument count into
+// account.
+//
+enum reg_save_layout {
+	//F0_off = 0,
+	//F31_off = F0_off + 31,
+	//T0_off = F31_off + 1,
+	T0_off = 0,
+//	T8_off = T0_off + 8,
+//	T9_off,
+	S0_off = T0_off + 8,
+	FP_off = S0_off + 8,
+	SP_off,
+	V0_off,
+	V1_off,
+/*	A0_off,
+	A1_off,
+	A2_off,
+	A3_off,*/
+	//temp_2_off,
+	temp_1_off,
+	saved_fp_off,
+	return_off,
+	reg_save_frame_size,
+
+	// illegal instruction handler
+	continue_dest_off = temp_1_off,
+
+	// deoptimization equates
+	//deopt_type = temp_2_off,             // slot for type of deopt in progress
+	ret_type = temp_1_off                // slot for return type
+};
+
+// Save off registers which might be killed by calls into the runtime.
+// Tries to smart of about FP registers.  In particular we separate
+// saving and describing the FPU registers for deoptimization since we
+// have to save the FPU registers twice if we describe them and on P4
+// saving FPU registers which don't contain anything appears
+// expensive.  The deopt blob is the only thing which needs to
+// describe FPU registers.  In all other cases it should be sufficient
+// to simply save their current value.
+//FIXME, I have no idea which register should be saved . @jerome
+static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
+		bool save_fpu_registers = true, bool describe_fpu_registers = false) {
+	int frame_size = reg_save_frame_size + num_rt_args; // args + thread
+	sasm->set_frame_size(frame_size);
+
+	// record saved value locations in an OopMap
+	// locations are offsets from sp after runtime call; num_rt_args is number of arguments
+	// in call, including thread
+	OopMap* map = new OopMap(frame_size, 0);
+	/*  map->set_callee_saved(VMRegImpl::stack2reg(eax_off + num_rt_args), eax->as_VMReg());
+	    map->set_callee_saved(VMRegImpl::stack2reg(ecx_off + num_rt_args), ecx->as_VMReg());
+	    map->set_callee_saved(VMRegImpl::stack2reg(edx_off + num_rt_args), edx->as_VMReg());
+	    map->set_callee_saved(VMRegImpl::stack2reg(ebx_off + num_rt_args), ebx->as_VMReg());
+	    map->set_callee_saved(VMRegImpl::stack2reg(esi_off + num_rt_args), esi->as_VMReg());
+	    map->set_callee_saved(VMRegImpl::stack2reg(edi_off + num_rt_args), edi->as_VMReg());
+	    */
+	map->set_callee_saved(VMRegImpl::stack2reg(V1_off + num_rt_args), V1->as_VMReg());
+	map->set_callee_saved(VMRegImpl::stack2reg(V0_off + num_rt_args), V0->as_VMReg());
+	map->set_callee_saved(VMRegImpl::stack2reg(T0_off + num_rt_args), T0->as_VMReg());
+	return map;
+}
+
+#if 0
+static void print_live_registers(StubAssembler* sasm)
+{
+	__ pushad();
+	__ addiu(SP, SP, -1 * wordSize);
+	for(int i = 0; i < 32; i++)
+	{
+		__ move(A0,(Register)(i));
+		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int));
+		__ delayed()->nop();
+	}
+
+	__ addiu(SP, SP, wordSize);
+	__ popad();
+}
+#endif
+
+//FIXME, Is it enough to save this registers  by yyq
+static OopMap* save_live_registers(StubAssembler* sasm,
+                                   int num_rt_args,
+		                   bool save_fpu_registers = true,
+                                   bool describe_fpu_registers = false) {
+  //const int reg_save_frame_size = return_off + 1 + num_rt_args;
+  __ block_comment("save_live_registers");
+  int frame_size = reg_save_frame_size + num_rt_args; // args + thread //by yyq
+  sasm->set_frame_size(frame_size);
+  // save all register state - int, fpu
+  __ addi(SP, SP, -(reg_save_frame_size-2)* wordSize);
+
+  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
+    __ sw(r, SP, (r->encoding() - T0->encoding() + T0_off) * wordSize);
+  }
+  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
+    __ sw(r, SP, (r->encoding() - S0->encoding() + S0_off) * wordSize);
+  }
+  __ sw(V0, SP, V0_off * wordSize);
+  __ sw(V1, SP, V1_off * wordSize);
+
+  // save all fp data registers in double-precision format for use in possible deoptimization;
+  // must first restore FPUStatusWord that was initialized by push_FPU_state
+  // (fnsave instruction)
+
+  // record saved value locations in an OopMap
+  // locations are offsets from sp after runtime call;
+  // num_rt_args is number of arguments in call including thread
+
+  // locate the stack base for the register save area
+  //  const int base = SharedInfo::stack0 + num_rt_args;
+
+  OopMap* map = new OopMap(reg_save_frame_size, 0);
+
+  map->set_callee_saved(VMRegImpl::stack2reg(V0_off + num_rt_args), V0->as_VMReg());
+  map->set_callee_saved(VMRegImpl::stack2reg(V1_off + num_rt_args), V1->as_VMReg());
+  int i = 0;
+  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
+    map->set_callee_saved(VMRegImpl::stack2reg(T0_off + num_rt_args + i++), r->as_VMReg());
+  }
+  i = 0;
+  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
+    map->set_callee_saved(VMRegImpl::stack2reg(S0_off + num_rt_args + i++), r->as_VMReg());
+  }
+
+  return map;
+}
+
+static void restore_fpu(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  //static void restore_live_registers(MacroAssembler* sasm) {
+  for (Register r = T0; r != T7->successor(); r = r->successor() ) {
+    __ lw(r, SP, (r->encoding() - T0->encoding() + T0_off) * wordSize);
+  }
+  for (Register r = S0; r != S7->successor(); r = r->successor() ) {
+    __ lw(r, SP, (r->encoding() - S0->encoding() + S0_off) * wordSize);
+  }
+  __ lw(V0, SP, V0_off * wordSize);
+  __ lw(V1, SP, V1_off * wordSize);
+  __ addiu(SP, SP, (reg_save_frame_size - 2) * wordSize);
+}
+
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  __ block_comment("restore_live_registers");
+  restore_fpu(sasm, restore_fpu_registers);
+}
+
+static void restore_live_registers_except_V0(StubAssembler* sasm, bool restore_fpu_registers = true) {
+   //static void restore_live_registers(MacroAssembler* sasm) {
+   //FIXME , maybe V1 need to be saved too
+   __ block_comment("restore_live_registers except V0");
+   for (Register r = T0; r != T7->successor(); r = r->successor() ) {
+   	__ lw(r, SP, (r->encoding() - T0->encoding() + T0_off) * wordSize);
+   }
+   for (Register r = S0; r != S7->successor(); r = r->successor() ) {
+   	__ lw(r, SP, (r->encoding() - S0->encoding() + S0_off) * wordSize);
+   }
+   __ lw(V1, SP, V1_off * wordSize);
+   __ addiu(SP, SP, (reg_save_frame_size - 2) * wordSize);
+}
+void Runtime1::initialize_pd() {
+  // nothing to do
+}
+
+// target: the entry point of the method that creates and posts the exception oop
+// has_argument: true if the exception needs an argument (passed on stack because registers must be preserved)
+//OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+	// preserve all registers
+	int num_rt_args = has_argument ? 2 : 1;
+//	OopMap* oop_map = save_live_registers(sasm, num_rt_args);
+	OopMap* oop_map = save_live_registers(sasm, 0);
+
+	// now all registers are saved and can be used freely
+	// verify that no old value is used accidentally
+	//all reigster are saved , I think mips do not need this
+	// __ invalidate_registers(true, true, true, true, true, true);
+
+	// registers used by this stub
+	//  const Register temp_reg = ebx;
+	const Register temp_reg = T3;
+	// load argument for exception that is passed as an argument into the stub
+	if (has_argument) {
+		//  __ movl(temp_reg, Address(ebp, 2*BytesPerWord));
+		__ lw(temp_reg, Address(FP, 2*BytesPerWord));
+		//__ pushl(temp_reg);
+	//	__ push(temp_reg);
+	}
+	int call_offset;
+	if (has_argument)
+	 	call_offset = __ call_RT(noreg, noreg, target, temp_reg);
+        else
+	 	call_offset = __ call_RT(noreg, noreg, target);
+
+	OopMapSet* oop_maps = new OopMapSet();
+	oop_maps->add_gc_map(call_offset, oop_map);
+
+	__ stop("should not reach here");
+
+	return oop_maps;
+}
+
+//FIXME I do not know which reigster to use.should use T3 as real_return_addr @jerome
+void Runtime1::generate_handle_exception(StubAssembler *sasm, OopMapSet* oop_maps, OopMap* oop_map, bool save_fpu_registers) {
+	// incoming parameters
+	// const Register exception_oop = eax;
+	const Register exception_oop = V0;
+	//  const Register exception_pc = edx;
+	const Register exception_pc = V1;
+	// other registers used in this stub
+	// const Register real_return_addr = ebx;
+	const Register real_return_addr = T3;
+	// const Register thread = edi;
+	const Register thread = S6;
+
+	__ block_comment("generate_handle_exception");
+
+#ifdef TIERED
+	// C2 can leave the fpu stack dirty
+	// if (UseSSE < 2 ) {
+	__ empty_FPU_stack();
+	//}
+#endif // TIERED
+
+	// verify that only eax and edx is valid at this time
+	//for mips , I think this is not required
+	// __ invalidate_registers(false, true, true, false, true, true);
+	// verify that eax contains a valid exception
+	__ verify_not_null_oop(exception_oop);
+
+	// load address of JavaThread object for thread-local data
+	__ get_thread(thread);
+
+#ifdef ASSERT
+	// check that fields in JavaThread for exception oop and issuing pc are
+	// empty before writing to them
+	Label oop_empty;
+	//__ cmpl(Address(thread, JavaThread::exception_oop_offset()), 0);
+	//__ jcc(Assembler::equal, oop_empty);
+	__ lw(AT,Address(thread, in_bytes(JavaThread::exception_oop_offset())));
+	__ beq(AT,ZERO,oop_empty);
+	__ delayed()->nop();
+	__ stop("exception oop already set");
+	__ bind(oop_empty);
+	Label pc_empty;
+	//  __ cmpl(Address(thread, JavaThread::exception_pc_offset()), 0);
+	// __ jcc(Assembler::equal, pc_empty);
+	__ lw(AT,Address(thread, in_bytes(JavaThread::exception_pc_offset())));
+	__ beq(AT,ZERO,pc_empty);
+	__ delayed()->nop();
+	__ stop("exception pc already set");
+	__ bind(pc_empty);
+#endif
+
+	// save exception oop and issuing pc into JavaThread
+	// (exception handler will load it from here)
+	//__ movl(Address(thread, JavaThread::exception_oop_offset()), exception_oop);
+	__ sw(exception_oop,Address(thread, in_bytes(JavaThread::exception_oop_offset())));
+	//__ movl(Address(thread, JavaThread::exception_pc_offset()), exception_pc);
+	__ sw(exception_pc,Address(thread, in_bytes(JavaThread::exception_pc_offset())));
+
+	// save real return address (pc that called this stub)
+	//  __ movl(real_return_addr, Address(ebp, 1*BytesPerWord));
+	//__ lw(real_return_addr, Address(ebp, 1*BytesPerWord));
+	__ lw(real_return_addr, FP, 1*BytesPerWord);
+	// __ movl(Address(esp, temp_1_off * BytesPerWord), real_return_addr);
+	__ sw(real_return_addr,SP, temp_1_off * BytesPerWord);
+
+	// patch throwing pc into return address (has bci & oop map)
+	//__ movl(Address(ebp, 1*BytesPerWord), exception_pc);
+	__ sw(exception_pc,FP, 1*BytesPerWord);
+	// compute the exception handler.
+	// the exception oop and the throwing pc are read from the fields in JavaThread
+	int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address,
+				exception_handler_for_pc));
+	oop_maps->add_gc_map(call_offset, oop_map);
+	// eax: handler address or NULL if no handler exists
+	//      will be the deopt blob if nmethod was deoptimized while we looked up
+	//      handler regardless of whether handler existed in the nmethod.
+
+	// only eax is valid at this time, all other registers have been destroyed by the
+	// runtime call
+	//  __ invalidate_registers(false, true, true, true, true, true);
+
+	// Do we have an exception handler in the nmethod?
+	Label no_handler;
+	Label done;
+	//  __ testl(eax, eax);
+	//  __ jcc(Assembler::zero, no_handler);
+	__ beq(exception_oop,ZERO,no_handler);
+	__ delayed()->nop();
+	// exception handler found
+	// patch the return address -> the stub will directly return to the exception handler
+	// __ movl(Address(ebp, 1*BytesPerWord), eax);
+	__ sw(exception_oop, FP, 1*BytesPerWord);
+
+	// restore registers
+	restore_live_registers(sasm, save_fpu_registers);
+
+	// return to exception handler
+	__ leave();
+	//__ ret(0);
+	__ jr(RA);
+	__ delayed()->nop();
+	__ bind(no_handler);
+	// no exception handler found in this method, so the exception is
+	// forwarded to the caller (using the unwind code of the nmethod)
+	// there is no need to restore the registers
+
+	// restore the real return address that was saved before the RT-call
+	//__ movl(real_return_addr, Address(esp, temp_1_off * BytesPerWord));
+	//__ movl(Address(ebp, 1*BytesPerWord), real_return_addr);
+	__ lw(real_return_addr,SP, temp_1_off * BytesPerWord);
+	__ sw(real_return_addr, FP, 1*BytesPerWord);
+	// load address of JavaThread object for thread-local data
+	__ get_thread(thread);
+	// restore exception oop into eax (convention for unwind code)
+	//  __ movl(exception_oop, Address(thread, JavaThread::exception_oop_offset()));
+	__ lw(exception_oop, thread, in_bytes(JavaThread::exception_oop_offset()));
+
+	// clear exception fields in JavaThread because they are no longer needed
+	// (fields must be cleared because they are processed by GC otherwise)
+	// __ movl(Address(thread, JavaThread::exception_oop_offset()), NULL_WORD);
+	//  __ movl(Address(thread, JavaThread::exception_pc_offset()), NULL_WORD);
+	__ sw(ZERO,thread, in_bytes(JavaThread::exception_oop_offset()));
+	__ sw(ZERO,thread, in_bytes(JavaThread::exception_pc_offset()));
+	// pop the stub frame off
+	__ leave();
+	//__addiu(SP, FP, wordSize);
+	//__lw(FP, SP, (-1) * wordSize);
+	generate_unwind_exception(sasm);
+	__ stop("should not reach here");
+}
+
+
+
+
+void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
+	// incoming parameters
+	const Register exception_oop = V0;
+	// other registers used in this stub
+	const Register exception_pc = V1;
+	const Register handler_addr = T3;
+	const Register thread = S6;
+
+	// verify that only eax is valid at this time
+	//  __ invalidate_registers(false, true, true, true, true, true);
+
+#ifdef ASSERT
+	// check that fields in JavaThread for exception oop and issuing pc are empty
+	__ get_thread(thread);
+	Label oop_empty;
+	//  __ cmpl(Address(thread, JavaThread::exception_oop_offset()), 0);
+	__ lw(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
+	//__ jcc(Assembler::equal, oop_empty);
+	__ beq(AT,ZERO,oop_empty);
+	__ delayed()->nop();
+	__ stop("exception oop must be empty");
+	__ bind(oop_empty);
+
+	Label pc_empty;
+	// __ cmpl(Address(thread, JavaThread::exception_pc_offset()), 0);
+	__ lw(AT, thread, in_bytes(JavaThread::exception_pc_offset()));
+	//__ jcc(Assembler::equal, pc_empty);
+	__ beq(AT,ZERO, pc_empty);
+	__ delayed()->nop();
+	__ stop("exception pc must be empty");
+	__ bind(pc_empty);
+#endif
+	// clear the FPU stack in case any FPU results are left behind
+	__ empty_FPU_stack();
+
+	// leave activation of nmethod
+	__ addi(SP, FP, wordSize);
+	__ lw(FP, SP, - 4);
+	// store return address (is on top of stack after leave)
+	// __ movl(exception_pc, Address(esp));
+	__ lw(exception_pc,SP,0);
+	__ verify_oop(exception_oop);
+
+	// save exception oop from eax to stack before call
+	// __ pushl(exception_oop);
+	__ push(exception_oop);
+	// search the exception handler address of the caller (using the return address)
+	__ call_VM_leaf(CAST_FROM_FN_PTR(address,
+			SharedRuntime::exception_handler_for_return_address), exception_pc);
+	// eax: exception handler address of the caller
+
+	// only eax is valid at this time, all other registers have been destroyed by the call
+	// __ invalidate_registers(false, true, true, true, true, true);
+
+	// move result of call into correct register
+	//__ movl(handler_addr, eax);
+	__ move(handler_addr, V0);
+	// restore exception oop in eax (required convention of exception handler)
+	// __ popl(exception_oop);
+	__ super_pop(exception_oop);
+
+	__ verify_oop(exception_oop);
+
+	// get throwing pc (= return address).
+	// edx has been destroyed by the call, so it must be set again
+	// the pop is also necessary to simulate the effect of a ret(0)
+	// __ popl(exception_pc);
+	__  super_pop(exception_pc);
+	// verify that that there is really a valid exception in eax
+	__ verify_not_null_oop(exception_oop);
+
+	// continue at exception handler (return address removed)
+	// note: do *not* remove arguments when unwinding the
+	//       activation since the caller assumes having
+	//       all arguments on the stack when entering the
+	//       runtime to determine the exception handler
+	//       (GC happens at call site with arguments!)
+	// eax: exception oop
+	// edx: throwing pc
+	// ebx: exception handler
+	//  __ jmp(handler_addr);
+	__ jr(handler_addr);
+	__ delayed()->nop();
+}
+
+
+
+
+//static address deopt_with_exception_entry_for_patch = NULL;
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+
+	// use the maximum number of runtime-arguments here because it is difficult to
+	// distinguish each RT-Call.
+	// Note: This number affects also the RT-Call in generate_handle_exception because
+	//       the oop-map is shared for all calls.
+	//FIXME,for mips, I do not think it is need
+
+	const int num_rt_args = 1;  // thread
+	// const int num_rt_args = 2;  // for x86 version, thread + dummy ,push (eax)
+
+	DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+	assert(deopt_blob != NULL, "deoptimization blob must have been created");
+	// assert(deopt_with_exception_entry_for_patch != NULL,
+	// "deoptimization blob must have been created");
+
+	//OopMap* oop_map = save_live_registers(sasm, num_rt_args);
+	OopMap* oop_map = save_live_registers(sasm, 0);
+#ifndef OPT_THREAD
+	const Register thread = T8;
+	// push java thread (becomes first argument of C function)
+	__ get_thread(thread);
+#else
+	const Register thread = TREG;
+#endif
+	__ move(A0, thread);
+
+/*
+ *	NOTE: this frame should be compiled frame, but at this point, the pc in frame-anchor
+ *	is contained in interpreter. It should be wrong, and should be cleared but is not.
+ * 	even if we cleared the wrong pc in anchor, the default way to get caller pc in class frame
+ * 	is not right. It depends on that the caller pc is stored in *(sp - 1) but it's not the case
+ */
+	__ set_last_Java_frame(thread, NOREG, FP, NULL);
+	__ addi(SP, SP, (-1) * wordSize);
+	__ move(AT, -8);
+	__ andr(SP, SP, AT);
+	__ relocate(relocInfo::internal_pc_type);
+	{
+		int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
+		__ lui(AT, Assembler::split_high(save_pc));
+		__ addiu(AT, AT, Assembler::split_low(save_pc));
+	}
+	__ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+	// do the call
+	__ lui(T9, Assembler::split_high((int)target));
+	__ addiu(T9, T9, Assembler::split_low((int)target));
+	__ jalr(T9);
+	__ delayed()->nop();
+	OopMapSet*  oop_maps = new OopMapSet();
+	oop_maps->add_gc_map(__ offset(),  oop_map);
+
+#ifndef OPT_THREAD
+	__ get_thread(thread);
+#endif
+
+	__ lw (SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+	//  __ reset_last_Java_frame(thread, true);
+	__ reset_last_Java_frame(thread, true,true);
+	// discard thread arg
+//	__ addi(SP, SP, 1 * wordSize);
+	// check for pending exceptions
+	{
+		Label L, skip;
+		//Label no_deopt;
+		__ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+		__ beq(AT, ZERO, L);
+		__ delayed()->nop();
+		// exception pending => remove activation and forward to exception handler
+
+		//		__ beq(V0, ZERO, no_deopt);	// have we deoptimized?
+		__ bne(V0,ZERO, skip);
+		__ delayed()->nop();
+		///	__ beq(V0, ZERO, Runtime1::entry_for(Runtime1::forward_exception_id),
+		//			relocInfo::runtime_call_type);
+		__ jmp(Runtime1::entry_for(Runtime1::forward_exception_id),
+				relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		__ bind(skip);
+
+		// the deopt blob expects exceptions in the special fields of
+		// JavaThread, so copy and clear pending exception.
+
+		// load and clear pending exception
+		// __ movl(eax, Address(thread, Thread::pending_exception_offset()));
+		__ lw(V0, Address(thread,in_bytes(Thread::pending_exception_offset())));
+		//__ movl(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
+		__ sw(ZERO,Address(thread, in_bytes(Thread::pending_exception_offset())));
+
+		// check that there is really a valid exception
+		//__ verify_not_null_oop(eax);
+		__ verify_not_null_oop(V0);
+
+		// load throwing pc: this is the return address of the stub
+		// __ movl(edx, Address(esp, return_off * BytesPerWord));
+		__ lw(V1, Address(SP, return_off * BytesPerWord));
+
+
+#ifdef ASSERT
+		// check that fields in JavaThread for exception oop and issuing pc are empty
+		Label oop_empty;
+		// __ cmpl(Address(thread, JavaThread::exception_oop_offset()), 0);
+		//__ jcc(Assembler::equal, oop_empty);
+		__ lw(AT, Address(thread, in_bytes(JavaThread::exception_oop_offset())));
+		__ beq(AT,ZERO,oop_empty);
+		__ delayed()->nop();
+		__ stop("exception oop must be empty");
+		__ bind(oop_empty);
+
+		Label pc_empty;
+		// __ cmpl(Address(thread, JavaThread::exception_pc_offset()), 0);
+		//__ jcc(Assembler::equal, pc_empty);
+		__ lw(AT, Address(thread, in_bytes(JavaThread::exception_pc_offset())));
+		__ beq(AT,ZERO,pc_empty);
+		__ delayed()->nop();
+		__ stop("exception pc must be empty");
+		__ bind(pc_empty);
+#endif
+
+		// store exception oop and throwing pc to JavaThread
+		//    __ movl(Address(thread, JavaThread::exception_oop_offset()), eax);
+		__ sw(V0,Address(thread, in_bytes(JavaThread::exception_oop_offset())));
+		//__ movl(Address(thread, JavaThread::exception_pc_offset()), edx);
+		__ sw(V1,Address(thread, in_bytes(JavaThread::exception_pc_offset())));
+
+		restore_live_registers(sasm);
+
+		__ leave();
+		// __ addl(esp, 4);  // remove return address from stack
+		//__ addi(SP,SP, 4);  // remove return address from stack
+
+		// Forward the exception directly to deopt blob. We can blow no
+		// registers and must leave throwing pc on the stack.  A patch may
+		// have values live in registers so the entry point with the
+		// exception in tls.
+		__ jmp(deopt_blob->unpack_with_exception_in_tls(), relocInfo::runtime_call_type);
+		__ delayed()->nop();
+
+		__ bind(L);
+	}
+
+
+	// Runtime will return true if the nmethod has been deoptimized during
+	// the patching process. In that case we must do a deopt reexecute instead.
+
+	Label reexecuteEntry, cont;
+
+	__ beq(V0, ZERO, cont);                              // have we deoptimized?
+	__ delayed()->nop();
+
+	// Will reexecute. Proper return address is already on the stack we just restore
+	// registers, pop all of our frame but the return address and jump to the deopt blob
+	restore_live_registers(sasm);
+	__ leave();
+	__ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+
+	__ bind(cont);
+	restore_live_registers(sasm);
+	__ leave();
+	__ jr(RA);
+	__ delayed()->nop();
+
+	return oop_maps;
+}
+
+
+//OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm, int* frame_size) {
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+	// for better readability
+	const bool must_gc_arguments = true;
+	const bool dont_gc_arguments = false;
+
+
+	// default value; overwritten for some optimized stubs that are called
+	// from methods that do not use the fpu
+	bool save_fpu_registers = true;
+
+
+	// stub code & info for the different stubs
+	OopMapSet* oop_maps = NULL;
+
+  switch (id) {
+    case forward_exception_id:
+      {
+        // we're handling an exception in the context of a compiled
+        // frame.  The registers have been saved in the standard
+        // places.  Perform an exception lookup in the caller and
+        // dispatch to the handler if found.  Otherwise unwind and
+        // dispatch to the callers exception handler.
+
+        const Register thread = TREG;
+        const Register exception_oop = V0;
+        const Register exception_pc = V1;
+
+        // load pending exception oop into eax
+       // __ movl(exception_oop, Address(thread, Thread::pending_exception_offset()));
+        __ lw(exception_oop, thread, in_bytes(Thread::pending_exception_offset()));
+        // clear pending exception
+        //__ movl(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
+        __ sw(ZERO,thread, in_bytes(Thread::pending_exception_offset()));
+
+        // load issuing PC (the return address for this stub) into edx
+        //__ movl(exception_pc, Address(ebp, 1*BytesPerWord));
+        __ lw(exception_pc, FP, 1*BytesPerWord);
+
+        // make sure that the vm_results are cleared (may be unnecessary)
+        //__ movl(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
+        __ sw(ZERO,Address(thread, in_bytes(JavaThread::vm_result_offset())));
+        //__ movl(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
+        __ sw(ZERO,Address(thread, in_bytes(JavaThread::vm_result_2_offset())));
+
+        // verify that that there is really a valid exception in eax
+        __ verify_not_null_oop(exception_oop);
+
+
+        oop_maps = new OopMapSet();
+        OopMap* oop_map = generate_oop_map(sasm, 1);
+        generate_handle_exception(sasm, oop_maps, oop_map);
+        __ stop("should not reach here");
+      }
+      break;
+
+    case new_instance_id:
+    case fast_new_instance_id:
+    case fast_new_instance_init_check_id:
+      {
+        // i use T4 as klass register, V0 as result register. MUST accord with NewInstanceStub::emit_code
+        Register klass = T4; // Incoming
+        Register obj   = V0; // Result
+
+        if (id == new_instance_id) {
+          __ set_info("new_instance", dont_gc_arguments);
+        } else if (id == fast_new_instance_id) {
+          __ set_info("fast new_instance", dont_gc_arguments);
+        } else {
+          assert(id == fast_new_instance_init_check_id, "bad StubID");
+          __ set_info("fast new_instance init check", dont_gc_arguments);
+        }
+
+        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id)
+             && UseTLAB && FastTLABRefill) {
+          Label slow_path;
+          Register obj_size = T0;
+          Register t1       = T2;
+          Register t2       = T3;
+          assert_different_registers(klass, obj, obj_size, t1, t2);
+          if (id == fast_new_instance_init_check_id) {
+            // make sure the klass is initialized
+            __ lw(AT, klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
+            __ move(t1, instanceKlass::fully_initialized);
+            __ bne(AT, t1, slow_path);
+            __ delayed()->nop();
+          }
+#ifdef ASSERT
+          // assert object can be fast path allocated
+          {
+            Label ok, not_ok;
+            __ lw(obj_size, klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc));
+            __ blez(obj_size, not_ok);
+            __ delayed()->nop();
+            __ andi(t1 , obj_size, Klass::_lh_instance_slow_path_bit);
+            __ beq(t1, ZERO, ok);
+            __ bind(not_ok);
+            __ stop("assert(can be fast path allocated)");
+            __ should_not_reach_here();
+            __ bind(ok);
+          }
+#endif // ASSERT
+          // if we got here then the TLAB allocation failed, so try
+          // refilling the TLAB or allocating directly from eden.
+
+          Label retry_tlab, try_eden;
+          __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy edx (klass)
+
+          __ bind(retry_tlab);
+
+          // get the instance size
+          __ lw(obj_size, klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes());
+          __ sll(obj_size, obj_size, LogHeapWordSize);
+          __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+          __ verify_oop(obj);
+          __ jr(RA);
+          __ delayed()->nop();
+
+          __ bind(try_eden);
+
+          // get the instance size
+          __ lw(obj_size, klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes());
+          __ sll(obj_size, obj_size, LogHeapWordSize);
+          __ eden_allocate(obj, obj_size, 0, t1, t2, slow_path);
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2);
+          __ verify_oop(obj);
+          __ jr(RA);
+          __ delayed()->nop();
+
+          __ bind(slow_path);
+        }
+        __ enter();
+        OopMap* map = save_live_registers(sasm, 0);
+        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_V0(sasm);
+        __ verify_oop(obj);
+        __ leave();
+        __ jr(RA);
+        __ delayed()->nop();
+
+        // V0: new instance
+      }
+      break;
+
+
+#ifdef TIERED
+//FIXME, I hava no idea which register to use
+   case counter_overflow_id:
+      {
+//        Register bci = eax;
+        Register bci = T5;
+        __ enter();
+        OopMap* map = save_live_registers(sasm, 0);
+        // Retrieve bci
+        __ lw(bci, Address(ebp, 2*BytesPerWord));
+	int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ leave();
+        __ jr(RA);
+        __ delayed()->nop();
+      }
+      break;
+#endif // TIERED
+
+
+
+   case new_type_array_id:
+   case new_object_array_id:
+      {
+	// i use T2 as length register, T4 as klass register, V0 as result register.
+	// MUST accord with NewTypeArrayStub::emit_code, NewObjectArrayStub::emit_code
+	Register length   = T2; // Incoming
+	Register klass    = T4; // Incoming
+	Register obj      = V0; // Result
+
+	if (id == new_type_array_id) {
+          __ set_info("new_type_array", dont_gc_arguments);
+	} else {
+          __ set_info("new_object_array", dont_gc_arguments);
+	}
+
+	if (UseTLAB && FastTLABRefill) {
+	  Register arr_size = T0;
+          Register t1       = T1;
+          Register t2       = T3;
+          Label slow_path;
+          assert_different_registers(length, klass, obj, arr_size, t1, t2);
+
+          // check that array length is small enough for fast path
+          __ move(AT, C1_MacroAssembler::max_array_allocation_length);
+          __ slt(AT, AT, length);
+	  __ bne(AT, ZERO, slow_path);
+          __ delayed()->nop();
+
+	  // if we got here then the TLAB allocation failed, so try
+          // refilling the TLAB or allocating directly from eden.
+          Label retry_tlab, try_eden;
+          //T0,T1,T5,T8 have changed!
+          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves ebx & edx
+
+          __ bind(retry_tlab);
+
+          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
+          __ lw(t1, klass, klassOopDesc::header_size() * HeapWordSize
+                           + Klass::layout_helper_offset_in_bytes());
+          __ srl(AT, t1, Klass::_lh_log2_element_size_shift);
+          __ andi(AT, AT, Klass::_lh_log2_element_size_mask);
+          __ sllv(arr_size, length, AT);
+          __ srl(AT, t1, Klass::_lh_header_size_shift);
+          __ andi(AT, AT, Klass::_lh_header_size_mask);
+          __ add(arr_size, AT, arr_size);
+          __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask);  // align up
+          __ andi(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
+
+
+          __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
+          __ initialize_header(obj, klass, length,t1,t2);
+          __ lbu(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize
+                                    + Klass::layout_helper_offset_in_bytes()
+                                    + (Klass::_lh_header_size_shift / BitsPerByte)));
+          __ andi(t1, t1, Klass::_lh_header_size_mask);
+          __ sub(arr_size, arr_size, t1);  // body length
+          __ add(t1, t1, obj);             // body start
+          __ initialize_body(t1, arr_size, 0, t2);
+          __ verify_oop(obj);
+          __ jr(RA);
+          __ delayed()->nop();
+
+          __ bind(try_eden);
+          // get the allocation size: (length << (layout_helper & 0x1F)) + header_size
+          __ lw(t1, klass, klassOopDesc::header_size() * HeapWordSize
+                           + Klass::layout_helper_offset_in_bytes());
+          __ srl(AT, t1, Klass::_lh_log2_element_size_shift);
+          __ andi(AT, AT, Klass::_lh_log2_element_size_mask);
+          __ sllv(arr_size, length, AT);
+          __ srl(AT, t1, Klass::_lh_header_size_shift);
+          __ andi(AT, AT, Klass::_lh_header_size_mask);
+          __ add(arr_size, AT, arr_size);
+          __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask);  // align up
+          __ andi(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
+
+
+          __ eden_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
+
+          __ initialize_header(obj, klass, length,t1,t2);
+          __ lbu(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize
+                                    + Klass::layout_helper_offset_in_bytes()
+                                    + (Klass::_lh_header_size_shift / BitsPerByte)));
+          __ andi(t1, t1, Klass::_lh_header_size_mask);
+          __ sub(arr_size, arr_size, t1);  // body length
+          __ add(t1, t1, obj);             // body start
+
+          __ initialize_body(t1, arr_size, 0, t2);
+          __ verify_oop(obj);
+          __ jr(RA);
+          __ delayed()->nop();
+          __ bind(slow_path);
+        }
+
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm, 0);
+        int call_offset;
+        if (id == new_type_array_id) {
+          call_offset = __ call_RT(obj, noreg,
+                                   CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+        } else {
+          call_offset = __ call_RT(obj, noreg,
+				   CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+	}
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+	      //FIXME
+        restore_live_registers_except_V0(sasm);
+        __ verify_oop(obj);
+        __ leave();
+        __ jr(RA);
+        __ delayed()->nop();
+      }
+      break;
+
+	case new_multi_array_id:
+      {
+	      StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
+	     //refer to c1_LIRGenerate_mips.cpp:do_NewmultiArray
+	      // V0: klass
+	      // T2: rank
+	      // T0: address of 1st dimension
+	      //__ call_RT(V0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3);
+	      //OopMap* map = save_live_registers(sasm, 4);
+	      OopMap* map = save_live_registers(sasm, 0);
+	      int call_offset = __ call_RT(A1, noreg, CAST_FROM_FN_PTR(address, new_multi_array),
+			      V0,T2,T0);
+	      oop_maps = new OopMapSet();
+	      oop_maps->add_gc_map(call_offset, map);
+	      //FIXME
+	      // 	restore_live_registers_except_eax(sasm);
+	      restore_live_registers_except_V0(sasm);
+	      // V0: new multi array
+	      __ verify_oop(V0);
+      }
+      break;
+
+
+    case register_finalizer_id:
+      {
+	      __ set_info("register_finalizer", dont_gc_arguments);
+
+	      // The object is passed on the stack and we haven't pushed a
+	      // frame yet so it's one work away from top of stack.
+	      // __ movl(eax, Address(esp, 1 * BytesPerWord));
+	    //jerome_for_debug
+	//      __ lw(V0, Address(SP, (-2)* BytesPerWord));
+	 //     __ move(AT, (int)&jerome1);
+	  //    __ sw(V0, AT, 0);
+	   // __ lw(V0, Address(SP,  (-1)* BytesPerWord));
+	    //  __ move(AT, (int)&jerome2);
+	     // __ sw(V0, AT, 0);
+	   // __ lw(V0, Address(SP, 0 * BytesPerWord));
+	    //  __ move(AT, (int)&jerome3);
+	     // __ sw(V0, AT, 0);
+	   // __ lw(V0, Address(SP, 1 * BytesPerWord));
+	    //  __ move(AT, (int)&jerome4);
+	     // __ sw(V0, AT, 0);
+	   // __ lw(V0, Address(SP, 2 * BytesPerWord));
+	    //  __ move(AT, (int)&jerome5);
+	     // __ sw(V0, AT, 0);
+	     // __ move(AT, (int)&jerome6);
+	     // __ sw(T0, AT, 0);
+
+//reference to LIRGenerator::do_RegisterFinalizer, call_runtime
+	      //__ lw(V0, Address(SP, 0 * BytesPerWord));
+	      __ move(V0, A0);
+	      __ verify_oop(V0);
+	      // load the klass and check the has finalizer flag
+	      Label register_finalizer;
+	      // Register t = esi;
+	      Register t = T5;
+	      //__ movl(t, Address(eax, oopDesc::klass_offset_in_bytes()));
+	      __ lw(t, Address(V0, oopDesc::klass_offset_in_bytes()));
+	      //__ movl(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+	      __ lw(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+	      //__ testl(t, JVM_ACC_HAS_FINALIZER);
+	      //__ jcc(Assembler::notZero, register_finalizer);
+	      __ move(AT, JVM_ACC_HAS_FINALIZER);
+	      __ andr(AT, AT, t);
+
+	      //__ andi(AT,AT, JVM_ACC_HAS_FINALIZER);
+	      __ bne(AT,ZERO, register_finalizer);
+	      __ delayed()->nop();
+	      //__ ret(0);
+	      __ jr(RA);
+	      __ delayed()->nop();
+	      __ bind(register_finalizer);
+	      __ enter();
+	     // OopMap* map = save_live_registers(sasm, 2 /*num_rt_args */);
+	      OopMap* map = save_live_registers(sasm, 0 /*num_rt_args */);
+
+	      //__ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address,
+	                         //SharedRuntime::register_finalizer), eax);
+	      int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address,
+				      SharedRuntime::register_finalizer), V0);
+	      oop_maps = new OopMapSet();
+              oop_maps->add_gc_map(call_offset, map);
+
+	      // Now restore all the live registers
+	      restore_live_registers(sasm);
+
+	      __ leave();
+	      //__ ret(0);
+	      __ jr(RA);
+	      __ delayed()->nop();
+      }
+      break;
+
+//	case range_check_failed_id:
+	case throw_range_check_failed_id:
+      { StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
+	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address,
+				      throw_range_check_exception),true);
+      }
+      break;
+
+      case throw_index_exception_id:
+      {
+	      // i use A1 as the index register, for this will be the first argument, see call_RT
+	      StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
+	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address,
+				      throw_index_exception), true);
+      }
+      break;
+
+	case throw_div0_exception_id:
+      { StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
+	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address,
+				      throw_div0_exception), false);
+      }
+      break;
+
+	case throw_null_pointer_exception_id:
+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
+	      oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address,
+				      throw_null_pointer_exception),false);
+      }
+      break;
+
+        case handle_exception_nofpu_id:
+		save_fpu_registers = false;
+		 // fall through
+	case handle_exception_id:
+		{
+
+
+			StubFrame f(sasm, "handle_exception", dont_gc_arguments);
+			oop_maps = new OopMapSet();
+			//OopMap* oop_map = save_live_registers(sasm, 1, save_fpu_registers);
+			OopMap* oop_map = save_live_registers(sasm, 0, save_fpu_registers);
+			generate_handle_exception(sasm, oop_maps, oop_map, save_fpu_registers);
+		}
+		break;
+
+	case unwind_exception_id:
+		{
+			__ set_info("unwind_exception", dont_gc_arguments);
+
+			generate_unwind_exception(sasm);
+		}
+		break;
+
+
+	case throw_array_store_exception_id:
+		{ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
+			// tos + 0: link
+			//     + 1: return address
+			oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address,
+						throw_array_store_exception), false);
+		}
+		break;
+
+	case throw_class_cast_exception_id:
+		{ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
+			oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address,
+						throw_class_cast_exception), V0);
+		}
+		break;
+
+	case throw_incompatible_class_change_error_id:
+		{
+		StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
+		oop_maps = generate_exception_throw(sasm,
+			CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+		}
+		break;
+
+	case slow_subtype_check_id:
+		{
+		//actually , We do not use it
+			// A0:klass_RInfo		sub
+			// A1:k->encoding() super
+			__ set_info("slow_subtype_check", dont_gc_arguments);
+			///        __ pushl(edi);
+			///        __ pushl(esi);
+			///        __ pushl(ecx);
+			///        __ pushl(eax);
+			///    __ movl(esi, Address(esp, (super_off - 1) * BytesPerWord)); // super
+			///    __ movl(eax, Address(esp, (sub_off   - 1) * BytesPerWord)); // sub
+			__ sw(T0, SP, (-1) * wordSize);
+			__ sw(T1, SP, (-2) * wordSize);
+			__ addiu(SP, SP, (-2) * wordSize);
+
+			///__ movl(edi,Address(esi,sizeof(oopDesc)
+			//+ Klass::secondary_supers_offset_in_bytes()));
+			__ lw(AT, A0, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
+			///        __ movl(ecx,Address(edi,arrayOopDesc::length_offset_in_bytes()));
+			__ lw(T1, AT, arrayOopDesc::length_offset_in_bytes());
+			__ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+			Label miss, hit, loop;
+			///        __ repne_scan();
+			//			T1:count, AT:array, A1:sub maybe supper
+			__ bind(loop);
+			__ beq(T1, ZERO, miss);
+			__ delayed()->lw(T0, AT, 0);
+			__ beq(T0, A1, hit);
+			__ delayed();
+			__ addiu(T1, T1, -1);
+			__ b(loop);
+			__ delayed();
+			__ addiu(AT, AT, 4);
+
+			__ bind(hit);
+			///__ movl(Address(esi,sizeof(oopDesc)
+			//+ Klass::secondary_super_cache_offset_in_bytes()), eax);
+			///__ movl(Address(esp, (super_off   - 1) * BytesPerWord), 1); // result
+			__ sw(A1, A0, sizeof(oopDesc)
+					+ Klass::secondary_super_cache_offset_in_bytes());
+			__ addiu(V0, ZERO, 1);
+			__ addiu(SP, SP, 2 * wordSize);
+			__ lw(T0, SP, (-1) * wordSize);
+			__ lw(T1, SP, (-2) * wordSize);
+			__ jr(RA);
+			__ delayed()->nop();
+
+
+			__ bind(miss);
+			/// __ movl(Address(esp, (super_off   - 1) * BytesPerWord), 0); // result
+			__ move(V0, ZERO);
+			__ addiu(SP, SP, 2 * wordSize);
+			__ lw(T0, SP, (-1) * wordSize);
+			__ lw(T1, SP, (-2) * wordSize);
+			__ jr(RA);
+			__ delayed()->nop();
+		}
+		break;
+
+        case monitorenter_nofpu_id:
+              save_fpu_registers = false;
+              // fall through
+
+	case monitorenter_id:
+	      {     /*
+		       StubFrame f(sasm, "monitorenter", dont_gc_arguments, V0, T6);
+		      // V0: object
+		      // T6: lock address
+		      __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), V0, T6);
+		      */
+		      StubFrame f(sasm, "monitorenter", dont_gc_arguments);
+		   //   OopMap* map = save_live_registers(sasm, 3, save_fpu_registers);
+		      OopMap* map = save_live_registers(sasm, 0, save_fpu_registers);
+
+		      //f.load_argument(1, eax); // eax: object
+		      f.load_argument(1, V0); // eax: object
+		      //f.load_argument(0, ebx); // ebx: lock address
+		      f.load_argument(0, T6); // ebx: lock address
+		      int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address,
+					      monitorenter), V0, T6);
+
+		      oop_maps = new OopMapSet();
+		      oop_maps->add_gc_map(call_offset, map);
+		      restore_live_registers(sasm, save_fpu_registers);
+	      }
+	      break;
+
+	case monitorexit_nofpu_id:
+	      save_fpu_registers = false;
+	      // fall through
+	case monitorexit_id:
+	      {
+		      StubFrame f(sasm, "monitorexit", dont_gc_arguments);
+		     // OopMap* map = save_live_registers(sasm, 2, save_fpu_registers);
+		      OopMap* map = save_live_registers(sasm, 0, save_fpu_registers);
+
+		      //f.load_argument(0, eax); // eax: lock address
+		      f.load_argument(0, T6); // eax: lock address
+		      // note: really a leaf routine but must setup last java sp
+		      //       => use call_RT for now (speed can be improved by
+		      //       doing last java sp setup manually)
+		      //  int call_offset = __ call_RT(noreg, noreg,
+		      //  CAST_FROM_FN_PTR(address, monitorexit), eax);
+		      int call_offset = __ call_RT(noreg, noreg,
+				      CAST_FROM_FN_PTR(address, monitorexit), T6);
+		      oop_maps = new OopMapSet();
+		      oop_maps->add_gc_map(call_offset, map);
+		      restore_live_registers(sasm, save_fpu_registers);
+
+	      }
+	      break;
+	      //  case init_check_patching_id:
+	case access_field_patching_id:
+	      {
+		      StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
+		      // we should set up register map
+		      oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address,
+					      access_field_patching));
+	      }
+	      break;
+
+	case load_klass_patching_id:
+		{
+			StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
+			// we should set up register map
+			oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address,
+						move_klass_patching));
+		}
+		break;
+	case jvmti_exception_throw_id:
+		{
+			// V0: exception oop
+			// V1: exception pc
+			StubFrame f(sasm, "jvmti_exception_throw", dont_gc_arguments);
+			// Preserve all registers across this potentially blocking call
+			const int num_rt_args = 2;  // thread, exception oop
+			//OopMap* map = save_live_registers(sasm, num_rt_args);
+			OopMap* map = save_live_registers(sasm, 0);
+			int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address,
+						Runtime1::post_jvmti_exception_throw), V0);
+			oop_maps = new OopMapSet();
+//			oop_maps->add_gc_map(call_offset, true, map);
+			oop_maps->add_gc_map(call_offset,  map);
+			restore_live_registers(sasm);
+		}
+		break;
+	case dtrace_object_alloc_id:
+		{ // eax: object
+			// V0:object
+			StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
+			// we can't gc here so skip the oopmap but make sure that all
+			// the live registers get saved.
+			//save_live_registers(sasm, 1);
+			save_live_registers(sasm, 0);
+
+			//__ pushl(eax);
+			__ push_reg(V0);
+			__ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc),
+					relocInfo::runtime_call_type);
+			//__ popl(eax);
+			__ super_pop(V0);
+
+			restore_live_registers(sasm);
+		}
+		break;
+	case fpu2long_stub_id:
+	{
+                   //FIXME, I hava no idea how to port this
+	}
+	default:
+		{ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
+			__ move(A1, (int)id);
+			__ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A1);
+			__ should_not_reach_here();
+		}
+		break;
+	}
+	return oop_maps;
+}
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/c1_globals_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+//
+
+#ifndef TIERED
+define_pd_global(bool, BackgroundCompilation,        true );
+define_pd_global(bool, UseTLAB,                      true );
+define_pd_global(bool, ResizeTLAB,                   true );
+define_pd_global(bool, InlineIntrinsics,             true );
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 false);
+define_pd_global(bool, UseOnStackReplacement,        true );
+define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(intx, CompileThreshold,             1500 );
+define_pd_global(intx, Tier2CompileThreshold,        1500 );
+define_pd_global(intx, Tier3CompileThreshold,        2500 );
+define_pd_global(intx, Tier4CompileThreshold,        4500 );
+
+define_pd_global(intx, BackEdgeThreshold,            100000);
+define_pd_global(intx, Tier2BackEdgeThreshold,       100000);
+define_pd_global(intx, Tier3BackEdgeThreshold,       100000);
+define_pd_global(intx, Tier4BackEdgeThreshold,       100000);
+define_pd_global(intx, OnStackReplacePercentage,     933  );
+define_pd_global(intx, FreqInlineSize,               325  );
+define_pd_global(intx, NewRatio,                     12   );
+define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
+define_pd_global(intx, UseSSE,                       0);
+define_pd_global(intx, InitialCodeCacheSize,         160*K);
+define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+define_pd_global(bool, ProfileInterpreter,           false);
+define_pd_global(intx, CodeCacheExpansionSize,       32*K );
+define_pd_global(uintx,CodeCacheMinBlockLength,      1);
+define_pd_global(uintx, PermSize,                    12*M );
+define_pd_global(uintx, MaxPermSize,                 64*M );
+define_pd_global(bool, NeverActAsServerClassMachine, true);
+define_pd_global(uintx, DefaultMaxRAM,               1*G);
+define_pd_global(bool, CICompileOSR,                 true );//by_css
+#endif // TIERED
+define_pd_global(bool, UseTypeProfile,               false);
+define_pd_global(bool, RoundFPResults,               true );
+
+
+define_pd_global(bool, LIRFillDelaySlots,            false);
+define_pd_global(bool, OptimizeSinglePrecision,      true);
+define_pd_global(bool, CSEArrayLength,               false);
+define_pd_global(bool, TwoOperandLIRForm,            false);
+
+
+define_pd_global(intx, SafepointPollOffset, 256);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2002-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+private:
+  void pd_initialize() {}
+
+public:
+  void flush_bundle(bool start_new_bundle) {}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/copy_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2003-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+#include "incls/_copy_pd.inline.hpp.incl"
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+  juint* to = (juint*)tohw;
+  count *= HeapWordSize / BytesPerInt;
+  while (count-- > 0) {
+    *to++ = value;
+  }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+  pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+  (void)memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+  pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+  (void)memset(to, 0, count);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,47 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+ protected:
+
+#if 0
+  address generate_asm_interpreter_entry(bool synchronized);
+  address generate_native_entry(bool synchronized);
+  address generate_abstract_entry(void);
+  address generate_math_entry(AbstractInterpreter::MethodKind kind);
+  address generate_empty_entry(void);
+  address generate_accessor_entry(void);
+  void lock_method(void);
+  void generate_stack_overflow_check(void);
+
+  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
+  void generate_counter_overflow(Label* do_continue);
+#endif
+
+  void generate_more_monitors();
+  void generate_deopt_handling();
+  address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only
+  void generate_compute_interpreter_state(const Register state,
+                                          const Register prev_state,
+                                          const Register sender_sp,
+                                          bool native); // C++ interpreter only
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2007-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010-xxx Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_cppInterpreter_mips.cpp.incl"
+
+#ifdef CC_INTERP
+
+// Routine exists to make tracebacks look decent in debugger
+// while "shadow" interpreter frames are on stack. It is also
+// used to distinguish interpreter frames.
+
+extern "C" void RecursiveInterpreterActivation(interpreterState istate) {
+  ShouldNotReachHere();
+}
+
+bool CppInterpreter::contains(address pc) {
+  Unimplemented();
+}
+
+#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name))
+#define __ _masm->
+
+Label frame_manager_entry;
+Label fast_accessor_slow_entry_path;  // fast accessor methods need to be able to jmp to unsynchronized
+                                      // c++ interpreter entry point this holds that entry point label.
+
+static address unctrap_frame_manager_entry  = NULL;
+
+static address interpreter_return_address  = NULL;
+static address deopt_frame_manager_return_atos  = NULL;
+static address deopt_frame_manager_return_btos  = NULL;
+static address deopt_frame_manager_return_itos  = NULL;
+static address deopt_frame_manager_return_ltos  = NULL;
+static address deopt_frame_manager_return_ftos  = NULL;
+static address deopt_frame_manager_return_dtos  = NULL;
+static address deopt_frame_manager_return_vtos  = NULL;
+
+const Register prevState = G1_scratch;
+
+void InterpreterGenerator::save_native_result(void) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::restore_native_result(void) {
+  Unimplemented();
+}
+
+// A result handler converts/unboxes a native call result into
+// a java interpreter/compiler result. The current frame is an
+// interpreter frame. The activation frame unwind code must be
+// consistent with that of TemplateTable::_return(...). In the
+// case of native methods, the caller's SP was not modified.
+address CppInterpreterGenerator::generate_result_handler_for(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreter::return_entry(TosState state, int length) {
+  Unimplemented();
+}
+
+address CppInterpreter::deopt_entry(TosState state, int length) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_empty_entry(void) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_accessor_entry(void) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_native_entry(bool synchronized) {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state,
+                                                              const Register prev_state,
+                                                              bool native) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::lock_method(void) {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_deopt_handling() {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_more_monitors() {
+  Unimplemented();
+}
+
+
+static address interpreter_frame_manager = NULL;
+
+void CppInterpreterGenerator::adjust_callers_stack(Register args) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_normal_entry(bool synchronized) {
+  Unimplemented();
+}
+
+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
+ : CppInterpreterGenerator(code) {
+  Unimplemented();
+}
+
+
+static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) {
+  Unimplemented();
+}
+
+int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
+  Unimplemented();
+}
+
+void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill,
+                                           frame* caller,
+                                           frame* current,
+                                           methodOop method,
+                                           intptr_t* locals,
+                                           intptr_t* stack,
+                                           intptr_t* stack_base,
+                                           intptr_t* monitor_base,
+                                           intptr_t* frame_bottom,
+                                           bool is_top_frame
+                                           )
+{
+  Unimplemented();
+}
+
+void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) {
+  Unimplemented();
+}
+
+
+int AbstractInterpreter::layout_activation(methodOop method,
+                                           int tempcount, // Number of slots on java expression stack in use
+                                           int popframe_extra_args,
+                                           int moncount,  // Number of active monitors
+                                           int callee_param_size,
+                                           int callee_locals_size,
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame) {
+  Unimplemented();
+}
+
+#endif // CC_INTERP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2002 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreter to get the VM to print out the size.
+  // Max size with JVMTI and TaggedStackInterpreter
+
+  // QQQ this is proably way too large for c++ interpreter
+
+#ifdef _LP64
+  // The sethi() instruction generates lots more instructions when shell
+  // stack limit is unlimited, so that's why this is much bigger.
+  const static int InterpreterCodeSize = 210 * K;
+#else
+  const static int InterpreterCodeSize = 180 * K;
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/debug_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,45 @@
+/*
+ * Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_debug_mips.cpp.incl"
+
+#ifndef PRODUCT
+
+void pd_ps(frame f) {
+  intptr_t* sp = f.sp();
+  intptr_t* prev_sp = sp - 1;
+  intptr_t *pc = NULL;
+  intptr_t *next_pc = NULL;
+  int count = 0;
+  tty->print("register window backtrace from %#x:\n", sp);
+}
+
+// This function is used to add platform specific info
+// to the error reporting code.
+
+void pd_obfuscate_location(char *buf,int buflen) {}
+
+#endif // PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2002 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_depChecker_mips.cpp.incl"
+
+// Nothing to do on mips
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2002 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Nothing to do on Loongson
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/disassembler_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,559 @@
+#ifdef USE_PRAGMA_IDENT_SRC
+#pragma ident "@(#)disassembler_mips.cpp	1.35 03/12/23 16:36:14 JVM"
+#endif
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+//by yjl 6/21/2005
+//FIXME: ugly code here, it always loads a dll/so to do actually work, and dont work for product
+//change it in the future
+//1/2, 07 , jerome
+# include "incls/_precompiled.incl"
+# include "incls/_disassembler_mips.cpp.incl"
+
+//CHANGE_ME BY YJL
+#ifndef PRODUCT
+
+class mips32_env : public DisassemblerEnv {
+ private:
+  nmethod*      code;
+  outputStream* output;
+ public:
+  mips32_env(nmethod* rcode, outputStream* routput) {
+    code   = rcode;
+    output = routput;
+  }
+  void print_label(intptr_t value);
+  void print_raw(char* str) { output->print_raw(str); }
+  void print(char* format, ...);
+  char* string_for_offset(intptr_t value);
+  char* string_for_constant(unsigned char* pc, intptr_t value, int is_decimal);
+};
+
+
+void mips32_env::print_label(intptr_t value) {
+ if (!Universe::is_fully_initialized()) {
+	 output->print(INTPTR_FORMAT, value);
+	 return;
+ }
+  address adr = (address) value;
+  if (StubRoutines::contains(adr)) {
+    StubCodeDesc* desc = StubCodeDesc::desc_for(adr);
+    const char * desc_name = "unknown stub";
+    if (desc != NULL) {
+      desc_name = desc->name();
+    }
+    output->print("Stub::%s", desc_name);
+    if (WizardMode) output->print(" " INTPTR_FORMAT, value);
+  } else {
+    output->print(INTPTR_FORMAT, value);
+  }
+}
+
+void mips32_env::print(char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  output->vprint(format, ap);
+  va_end(ap);
+}
+
+char* mips32_env::string_for_offset(intptr_t value) {
+  stringStream st;
+ if (!Universe::is_fully_initialized()) {
+	 st.print("%d", value);
+	 return st.as_string();
+ }
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  BarrierSet::Name bsn = bs->kind();
+
+	if (bs->kind() == BarrierSet::CardTableModRef && (jbyte*) value == ((CardTableModRefBS*)(bs))->byte_map_base) {
+    st.print("word_map_base");
+  } else {
+    st.print("%d", value);
+  }
+  return st.as_string();
+}
+
+char* mips32_env::string_for_constant(unsigned char* pc, intptr_t value, int is_decimal) {
+  stringStream st;
+  oop obj = NULL;
+#ifndef CORE
+  if (code && (obj = code->embeddedOop_at(pc))!=NULL) {
+    obj->print_value_on(&st);
+  } else
+#endif
+  {
+    if (is_decimal == 1) {
+      st.print("%d", value);
+    } else {
+      st.print("0x%lx", value);
+    }
+  }
+  return st.as_string();
+}
+
+#define PRINT_ORRI(OP) \
+	env->print("%s %s, %s, 0x%x\n", OP, as_Register(Assembler::rt(insn))->name(), \
+			as_Register(Assembler::rs(insn))->name(), \
+			(short)Assembler::low16(insn) );
+
+#define PRINT_ORRL(OP) \
+	env->print("%s %s, %s, ", OP, as_Register(Assembler::rs(insn))->name(), \
+			as_Register(Assembler::rt(insn))->name()); \
+	env->print_label( (int)start + 4 + ((short)Assembler::low16(insn)<<2) ); \
+	env->print("\n");
+
+#define PRINT_J(OP) \
+	env->print((char*)OP); \
+	env->print_label( ( ( (int)start + 4 ) & 0xc0000000 ) | ( Assembler::low26(insn) << 2 ) ); \
+	env->print("\n");
+
+#define PRINT_ORSL(OP) \
+	env->print("%s %s, ", OP, as_Register(Assembler::rs(insn))->name()); \
+	env->print_label( (int)start + 4 + (short)Assembler::low16(insn) ); \
+	env->print("\n");
+
+#define PRINT_OROB(OP) \
+	env->print("%s %s, 0x%x(%s)\n", OP, as_Register(Assembler::rt(insn))->name(), \
+			(short)Assembler::low16(insn), \
+			as_Register(Assembler::rs(insn))->name() );
+
+#define PRINT_OFOB(OP) \
+	env->print("%s %s, 0x%x(%s)\n", OP, as_FloatRegister(Assembler::rt(insn))->name(), \
+			(short)Assembler::low16(insn), \
+			as_Register(Assembler::rs(insn))->name() );
+
+
+#define PRINT_ORRS(OP) \
+	env->print("%s %s, %s, %d\n", OP, as_Register(Assembler::rd(insn))->name(), \
+			as_Register(Assembler::rt(insn))->name(), \
+			Assembler::sa(insn) );
+
+#define PRINT_ORRR(OP) \
+	env->print("%s %s, %s, %s\n", OP, as_Register(Assembler::rd(insn))->name(), \
+			as_Register(Assembler::rs(insn))->name(), \
+			as_Register(Assembler::rt(insn))->name() );
+
+#define PRINT_ORRR_2(OP) \
+	env->print("%s %s, %s, %s\n", OP, as_Register(Assembler::rd(insn))->name(), \
+			as_Register(Assembler::rt(insn))->name(), \
+			as_Register(Assembler::rs(insn))->name() );
+
+#define PRINT_ORS(OP) \
+	env->print("%s %s\n", OP, as_Register(Assembler::rs(insn))->name());
+
+#define PRINT_ORD(OP) \
+	env->print("%s %s\n", OP, as_Register(Assembler::rd(insn))->name());
+
+#define PRINT_ORR(OP) \
+	env->print("%s %s, %s\n", OP, as_Register(Assembler::rs(insn))->name(), \
+			as_Register(Assembler::rt(insn))->name());
+
+#define PRINT_ORR_2(OP) \
+	env->print("%s %s, %s\n", OP, as_Register(Assembler::rt(insn))->name(), \
+			as_Register(Assembler::rd(insn))->name());
+
+#define PRINT_FLOAT(OP) \
+	env->print("%s.%s %s, %s, %s\n", OP, fmt, as_FloatRegister(Assembler::sa(insn))->name(), \
+			as_FloatRegister(Assembler::rd(insn))->name(),  \
+			as_FloatRegister(Assembler::rt(insn))->name() );
+
+#define PRINT_CVT(OP) \
+	env->print("%s.%s %s, %s\n", OP, fmt, as_FloatRegister(Assembler::sa(insn))->name(), \
+			 as_FloatRegister(Assembler::rd(insn))->name() );
+
+static const char* fmt_str(int fmt) {
+	switch(fmt) {
+	case Assembler::single_fmt:
+		return "s";
+	case Assembler::double_fmt:
+		return "d";
+	case Assembler::word_fmt:
+		return "w";
+	case Assembler::long_fmt:
+		return "l";
+	}
+
+	return "";
+}
+
+address Disassembler::decode_instruction(address start, DisassemblerEnv* env) {
+	int insn = *(int*)start;
+	int opcode = Assembler::opcode(insn);
+	int special;
+	const char *fmt;
+
+	switch(opcode) {
+	case Assembler::special_op:
+		special = Assembler::special(insn);
+		switch(special) {
+		case Assembler::sll_op:
+		case Assembler::srl_op:
+		case Assembler::sra_op:
+		case Assembler::dsll_op:
+		case Assembler::dsrl_op:
+		case Assembler::dsra_op:
+		case Assembler::dsll32_op:
+		case Assembler::dsrl32_op:
+		case Assembler::dsra32_op:
+			PRINT_ORRS(Assembler::special_name[special]);
+			break;
+
+		case Assembler::sllv_op:
+		case Assembler::srlv_op:
+		case Assembler::srav_op:
+		case Assembler::dsllv_op:
+		case Assembler::dsrlv_op:
+		case Assembler::dsrav_op:
+			PRINT_ORRR_2(Assembler::special_name[special]);
+			break;
+
+		case Assembler::jr_op:
+		case Assembler::jalr_op:
+		case Assembler::mthi_op:
+		case Assembler::mtlo_op:
+			PRINT_ORS(Assembler::special_name[special]);
+			break;
+
+		case Assembler::syscall_op:
+		case Assembler::break_op:
+			env->print("%s 0x%x\n", Assembler::special_name[special], bitfield(insn, 6, 20)>>10);
+			break;
+
+		case Assembler::sync_op:
+			env->print("sync\n");
+			break;
+
+		case Assembler::mfhi_op:
+		case Assembler::mflo_op:
+			PRINT_ORD(Assembler::special_name[special]);
+			break;
+
+		case Assembler::mult_op:
+		case Assembler::multu_op:
+		case Assembler::div_op:
+		case Assembler::divu_op:
+		case Assembler::dmult_op:
+		case Assembler::dmultu_op:
+		case Assembler::ddiv_op:
+		case Assembler::ddivu_op:
+			PRINT_ORR(Assembler::special_name[special]);
+			break;
+
+		case Assembler::add_op:
+		case Assembler::addu_op:
+		case Assembler::sub_op:
+		case Assembler::subu_op:
+		case Assembler::and_op:
+		case Assembler::or_op:
+		case Assembler::xor_op:
+		case Assembler::nor_op:
+		case Assembler::slt_op:
+		case Assembler::sltu_op:
+		case Assembler::dadd_op:
+		case Assembler::daddu_op:
+		case Assembler::dsub_op:
+		case Assembler::dsubu_op:
+			PRINT_ORRR(Assembler::special_name[special]);
+			break;
+
+		case Assembler::tge_op:
+		case Assembler::tgeu_op:
+		case Assembler::tlt_op:
+		case Assembler::tltu_op:
+		case Assembler::teq_op:
+		case Assembler::tne_op:
+			env->print("%s 0x%x, %s, %s\n", Assembler::special_name[special], bitfield(insn, 6, 10),
+					as_Register(Assembler::rs(insn))->name(),
+					as_Register(Assembler::rt(insn))->name() );
+			break;
+
+		default:
+			//Unimplemented();
+			env->print("0x%x\n", insn);
+		}
+		break;
+
+	case Assembler::regimm_op:
+		special	= Assembler::rt(insn);
+
+		switch(special) {
+		case Assembler::bltz_op:
+		case Assembler::bgez_op:
+		case Assembler::bltzl_op:
+		case Assembler::bgezl_op:
+		case Assembler::bltzal_op:
+		case Assembler::bgezal_op:
+		case Assembler::bltzall_op:
+		case Assembler::bgezall_op:
+			env->print("%s %s, ", Assembler::regimm_name[special], as_Register(Assembler::rs(insn))->name());
+			env->print_label( (int)start + 4 + (short)Assembler::low16(insn) );
+			env->print("\n");
+			break;
+
+		case Assembler::tgei_op:
+		case Assembler::tgeiu_op:
+		case Assembler::tlti_op:
+		case Assembler::tltiu_op:
+		case Assembler::teqi_op:
+		case Assembler::tnei_op:
+			env->print("%s %s, %d\n", Assembler::regimm_name[special],
+					as_Register(Assembler::rs(insn))->name(),
+					(short)Assembler::low16(insn));
+			break;
+
+		default:
+			//Unimplemented();
+			env->print("0x%x\n", insn);
+		}
+		break;
+
+	case Assembler::j_op:
+	case Assembler::jal_op:
+		PRINT_J(Assembler::ops_name[opcode]);
+		break;
+
+	case Assembler::beq_op:
+	case Assembler::bne_op:
+	case Assembler::blez_op:
+	case Assembler::bgtz_op:
+		PRINT_ORRL(Assembler::ops_name[opcode]);
+		break;
+
+	case Assembler::addi_op:
+	case Assembler::addiu_op:
+	case Assembler::slti_op:
+	case Assembler::sltiu_op:
+	case Assembler::ori_op:
+	case Assembler::andi_op:
+	case Assembler::xori_op:
+	case Assembler::daddi_op:
+	case Assembler::daddiu_op:
+		PRINT_ORRI(Assembler::ops_name[opcode]);
+		break;
+
+	case Assembler::lui_op:
+		env->print("lui %s, 0x%x\n", as_Register(Assembler::rt(insn))->name(), (short)Assembler::low16(insn) ); \
+		break;
+
+	case Assembler::cop1_op:
+		special = Assembler::rs(insn);
+		switch(special) {
+		case Assembler::mf_op:
+			PRINT_ORR_2("mfc1");
+			break;
+		case Assembler::mt_op:
+			PRINT_ORR_2("mtc1");
+			break;
+		case Assembler::cf_op:
+			PRINT_ORR_2("cfc1");
+			break;
+		case Assembler::ct_op:
+			PRINT_ORR_2("ctc1");
+			break;
+		case Assembler::dmf_op:
+			PRINT_ORR_2("dmfc1");
+			break;
+		case Assembler::dmt_op:
+			PRINT_ORR_2("dmtc1");
+			break;
+
+		case Assembler::bc_op:
+			special = Assembler::rt(insn);
+			switch(special) {
+			case Assembler::bcf_op:
+				env->print("bc1f ");
+				env->print_label( (int)start + 4 + (short)Assembler::low16(insn) );
+				env->print("\n");
+				break;
+			case Assembler::bcfl_op:
+				env->print("bc1fl ");
+				env->print_label( (int)start + 4 + (short)Assembler::low16(insn) );
+				env->print("\n");
+				break;
+			case Assembler::bct_op:
+				env->print("bc1t ");
+				env->print_label( (int)start + 4 + (short)Assembler::low16(insn) );
+				env->print("\n");
+				break;
+			case Assembler::bctl_op:
+				env->print("bc1tl ");
+				env->print_label( (int)start + 4 + (short)Assembler::low16(insn) );
+				env->print("\n");
+				break;
+			default:
+				//Unimplemented();
+			env->print("0x%x\n", insn);
+			}
+			break;
+		case Assembler::single_fmt:
+		case Assembler::double_fmt:
+		case Assembler::word_fmt:
+		case Assembler::long_fmt:
+			fmt = fmt_str(special);
+			special = Assembler::special(insn);
+			switch(special) {
+			case Assembler::fadd_op:
+			case Assembler::fsub_op:
+			case Assembler::fmul_op:
+			case Assembler::fdiv_op:
+			case Assembler::fsqrt_op:
+			case Assembler::fabs_op:
+			case Assembler::fmov_op:
+			case Assembler::fneg_op:
+			case Assembler::froundl_op:
+			case Assembler::ftruncl_op:
+			case Assembler::fceill_op:
+			case Assembler::ffloorl_op:
+			case Assembler::froundw_op:
+			case Assembler::ftruncw_op:
+			case Assembler::fceilw_op:
+			case Assembler::ffloorw_op:
+				PRINT_FLOAT(Assembler::float_name[special]);
+				break;
+
+			case Assembler::fcvts_op:
+				PRINT_CVT("cvt.s");
+				break;
+			case Assembler::fcvtd_op:
+				PRINT_CVT("cvt.d");
+				break;
+			case Assembler::fcvtw_op:
+				PRINT_CVT("cvt.w");
+				break;
+			case Assembler::fcvtl_op:
+				PRINT_CVT("cvt.l");
+				break;
+			default:
+				//tty->print_cr("0x%x(%x)", insn, opcode);
+				//Unimplemented();
+			env->print("0x%x\n", insn);
+			}
+		}
+		break;
+
+	case Assembler::beql_op:
+	case Assembler::bnel_op:
+	case Assembler::blezl_op:
+	case Assembler::bgtzl_op:
+		PRINT_ORRL(Assembler::ops_name[opcode]);
+		break;
+
+	case Assembler::ldl_op:
+	case Assembler::ldr_op:
+	case Assembler::lb_op:
+	case Assembler::lh_op:
+	case Assembler::lwl_op:
+	case Assembler::lw_op:
+	case Assembler::lbu_op:
+	case Assembler::lhu_op:
+	case Assembler::lwr_op:
+	case Assembler::lwu_op:
+	case Assembler::sb_op:
+	case Assembler::sh_op:
+	case Assembler::swl_op:
+	case Assembler::sw_op:
+	case Assembler::sdl_op:
+	case Assembler::sdr_op:
+	case Assembler::swr_op:
+	case Assembler::ll_op:
+	case Assembler::lld_op:
+	case Assembler::ld_op:
+	case Assembler::sc_op:
+	case Assembler::scd_op:
+	case Assembler::sd_op:
+		PRINT_OROB(Assembler::ops_name[opcode]);
+		break;
+	case Assembler::sdc1_op:
+	case Assembler::ldc1_op:
+	case Assembler::lwc1_op:
+	case Assembler::swc1_op:
+		PRINT_OFOB(Assembler::ops_name[opcode]);
+		break;
+
+	default:
+		//tty->print_cr("0x%x(%x)", insn, opcode);
+		//Unimplemented();
+			env->print("0x%x\n", insn);
+	}
+
+	return start+4;
+}
+
+
+void Disassembler::decode(CodeBlob* cb, outputStream* st) {
+#ifndef CORE
+  st = st ? st : tty;
+  st->print_cr("Decoding CodeBlob " INTPTR_FORMAT, cb);
+  decode(cb->instructions_begin(), cb->instructions_end(), st);
+#endif
+}
+
+
+void Disassembler::decode(u_char* begin, u_char* end, outputStream* st) {
+  st = st ? st : tty;
+
+  const int show_bytes = false; // for disassembler debugging
+
+  mips32_env env(NULL, st);
+  unsigned char*  p = (unsigned char*) begin;
+  CodeBlob* cb = CodeCache::find_blob_unsafe(begin);
+  while (p < (unsigned char*) end) {
+  if (cb != NULL) {
+	  cb->print_block_comment(st, (intptr_t)(p - cb->instructions_begin()));
+  }
+
+
+    unsigned char* p0 = p;
+    st->print("   "INTPTR_FORMAT ": ", p);
+    p = decode_instruction(p, &env);
+    if (show_bytes) {
+      st->print("\t\t\t");
+      while (p0 < p) st->print("%x ", *p0++);
+    }
+    st->cr();
+  }
+}
+
+
+void Disassembler::decode(nmethod* nm, outputStream* st) {
+#ifndef CORE
+  st = st ? st : tty;
+
+  st->print_cr("Decoding compiled method " INTPTR_FORMAT ":", nm);
+  st->print("Code:");
+  st->cr();
+
+  mips32_env env(nm, st);
+#ifdef COMPILER1
+  unsigned char* p = nm->code_begin();
+#else
+  unsigned char* p = nm->instructions_begin();
+#endif
+  unsigned char* end = nm->instructions_end();
+  while (p < end) {
+    if (p == nm->entry_point())             st->print_cr("[Entry Point]");
+    if (p == nm->verified_entry_point())    st->print_cr("[Verified Entry Point]");
+    if (p == nm->exception_begin())         st->print_cr("[Exception Handler]");
+    if (p == nm->stub_begin())              st->print_cr("[Stub Code]");
+    if (p == nm->consts_begin())            st->print_cr("[Constants]");
+    nm->print_block_comment(st, (intptr_t)(p - nm->instructions_begin()));
+    unsigned char* p0 = p;
+    st->print("  " INTPTR_FORMAT ": ", p);
+    p = decode_instruction(p, &env);
+    nm->print_code_comment_on(st, 40, p0, p);
+    st->cr();
+    // Output pc bucket ticks if we have any
+    address bucket_pc = FlatProfiler::bucket_start_for(p);
+    if (bucket_pc != NULL && bucket_pc > p0 && bucket_pc <= p) {
+      int bucket_count = FlatProfiler::bucket_count_for(bucket_pc);
+      tty->print_cr("[%d]", bucket_count);
+    }
+  }
+#endif
+}
+
+#endif // PRODUCT
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,24 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)disassembler_mips.hpp	1.16 03/12/23 16:36:15 JVM"
+#endif
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+//by yjl /6/21/2005
+
+// The disassembler prints out mips32 code annotated
+// with Java specific information.
+
+class Disassembler {
+ private:
+  // decodes one instruction and return the start of the next instruction.
+  static address decode_instruction(address start, DisassemblerEnv* env);
+ public:
+	//CHANGE_ME BY YJL
+  static void decode(CodeBlob *cb,               outputStream* st = NULL) PRODUCT_RETURN;
+  static void decode(nmethod* nm,                outputStream* st = NULL) PRODUCT_RETURN;
+  static void decode(u_char* begin, u_char* end, outputStream* st = NULL) PRODUCT_RETURN;
+};
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/dump_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2004-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_dump_mips.cpp.incl"
+
+
+
+// Generate the self-patching vtable method:
+//
+// This method will be called (as any other Klass virtual method) with
+// the Klass itself as the first argument.  Example:
+//
+//      oop obj;
+//      int size = obj->klass()->klass_part()->oop_size(this);
+//
+// for which the virtual method call is Klass::oop_size();
+//
+// The dummy method is called with the Klass object as the first
+// operand, and an object as the second argument.
+//
+
+//=====================================================================
+
+// All of the dummy methods in the vtable are essentially identical,
+// differing only by an ordinal constant, and they bear no releationship
+// to the original method which the caller intended. Also, there needs
+// to be 'vtbl_list_size' instances of the vtable in order to
+// differentiate between the 'vtable_list_size' original Klass objects.
+
+#define __ masm->
+
+void CompactingPermGenGen::generate_vtable_methods(void** vtbl_list,
+                                                   void** vtable,
+                                                   char** md_top,
+                                                   char* md_end,
+                                                   char** mc_top,
+                                                   char* mc_end) {
+
+  intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
+  *(intptr_t *)(*md_top) = vtable_bytes;
+  *md_top += sizeof(intptr_t);
+  void** dummy_vtable = (void**)*md_top;
+  *vtable = dummy_vtable;
+  *md_top += vtable_bytes;
+
+  // Get ready to generate dummy methods.
+
+  CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+
+  Label common_code;
+  for (int i = 0; i < vtbl_list_size; ++i) {
+    for (int j = 0; j < num_virtuals; ++j) {
+      dummy_vtable[num_virtuals * i + j] = (void*)masm->pc();
+
+      // Load eax with a value indicating vtable/offset pair.
+      // -- bits[ 7..0]  (8 bits) which virtual method in table?
+      // -- bits[12..8]  (5 bits) which virtual method table?
+      // -- must fit in 13-bit instruction immediate field.
+      __ move(V0, (i << 8) + j);
+      __ b(common_code);
+      __ delayed()->nop();
+    }
+  }
+
+  __ bind(common_code);
+
+  __ srl(T9, V0, 8);		// isolate vtable identifier.
+  __ shl(T9, LogBytesPerWord);
+  __ move(AT, (int)vtbl_list);
+  __ add(T9, AT, T9);
+  __ lw(T9, T9, 0); 		// get correct vtable address.
+  __ sw(T9, A0, 0);		// update vtable pointer.
+
+  __ andi(V0, V0, 0x00ff);	// isolate vtable method index
+  __ shl(V0, LogBytesPerWord);
+  __ add(T9, T9, V0);		// address of real method pointer.
+  __ jr(T9);			// get real method pointer.
+  __ delayed()->nop();
+
+  __ flush();
+
+  *mc_top = (char*)__ pc();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/frame_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,431 @@
+/*
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_frame_mips.cpp.incl"
+
+// Profiling/safepoint support
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif
+
+
+// Profiling/safepoint support
+// for Profiling - acting on another frame. walks sender frames
+// if valid.
+// frame profile_find_Java_sender_frame(JavaThread *thread);
+
+bool frame::safe_for_sender(JavaThread *thread) {
+	address   sp = (address)_sp;
+	address   fp = (address)_fp;
+	bool sp_safe = (sp != NULL &&
+			(sp <= thread->stack_base()) &&
+			(sp >= thread->stack_base() - thread->stack_size()));
+	bool fp_safe = (fp != NULL &&
+			(fp <= thread->stack_base()) &&
+			(fp >= thread->stack_base() - thread->stack_size()));
+	if (sp_safe && fp_safe) {
+		CodeBlob *cb = CodeCache::find_blob_unsafe(_pc);
+		// First check if frame is complete and tester is reliable
+		// Unfortunately we can only check frame complete for runtime stubs and nmethod
+		// other generic buffer blobs are more problematic so we just assume they are
+		// ok. adapter blobs never have a frame complete and are never ok.
+		if (cb != NULL && !cb->is_frame_complete_at(_pc)) {
+			if (cb->is_nmethod() || cb->is_adapter_blob() || cb->is_runtime_stub()) {
+				return false;
+			}
+		}
+		return true;
+	}
+	// Note: fp == NULL is not really a prerequisite for this to be safe to
+	// walk for c2. However we've modified the code such that if we get
+	// a failure with fp != NULL that we then try with FP == NULL.
+	// This is basically to mimic what a last_frame would look like if
+	// c2 had generated it.
+	if (sp_safe && fp == NULL) {
+		CodeBlob *cb = CodeCache::find_blob_unsafe(_pc);
+		// frame must be complete if fp == NULL as fp == NULL is only sensible
+		// if we are looking at a nmethod and frame complete assures us of that.
+		if (cb != NULL && cb->is_frame_complete_at(_pc) && cb->is_compiled_by_c2()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+
+void frame::patch_pc(Thread* thread, address pc) {
+	if (TracePcPatching) {
+		tty->print_cr("patch_pc at address  0x%x [0x%x -> 0x%x] ", &((address *)_sp)[-1], ((address *)_sp)[-1], pc);
+	}
+	((address *)_sp)[-1] = _pc = pc;
+}
+
+bool frame::is_interpreted_frame() const  {
+	return Interpreter::contains(pc());
+}
+
+int frame::frame_size() const {
+	RegisterMap map(JavaThread::current(), false);
+	frame sender = this->sender(&map);
+	return sender.sp() - sp();
+}
+
+intptr_t* frame::entry_frame_argument_at(int offset) const {
+	// convert offset to index to deal with tsi
+	int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+	// Entry frame's arguments are always in relation to unextended_sp()
+	return &unextended_sp()[index];
+}
+
+// sender_sp
+#ifdef CC_INTERP
+intptr_t* frame::interpreter_frame_sender_sp() const {
+	assert(is_interpreted_frame(), "interpreted frame expected");
+	// QQQ why does this specialize method exist if frame::sender_sp() does same thing?
+	// seems odd and if we always know interpreted vs. non then sender_sp() is really
+	// doing too much work.
+	return get_interpreterState()->sender_sp();
+}
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+	return get_interpreterState()->monitor_base();
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+	return (BasicObjectLock*) get_interpreterState()->stack_base();
+}
+
+#else // CC_INTERP
+
+intptr_t* frame::interpreter_frame_sender_sp() const {
+	assert(is_interpreted_frame(), "interpreted frame expected");
+	return (intptr_t*) at(interpreter_frame_sender_sp_offset);
+}
+
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
+	assert(is_interpreted_frame(), "interpreted frame expected");
+	int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
+}
+
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+	return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+	BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
+	// make sure the pointer points inside the frame
+	assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
+	assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
+	return result;
+}
+
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
+	*((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
+}
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
+	*((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
+}
+#endif // CC_INTERP
+
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
+	assert(map != NULL, "map must be set");
+	// Java frame called from C; skip all C frames and return top C
+	// frame of that chunk as the sender
+	JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+	assert(!entry_frame_is_first(), "next Java fp must be non zero");
+	assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
+	map->clear();
+	assert(map->include_argument_oops(), "should be set by clear");
+	if (jfa->last_Java_pc() != NULL ) {
+		frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+		return fr;
+	}
+	frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
+	return fr;
+}
+
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
+	// sp is the raw sp from the sender after adapter or interpreter extension
+	//intptr_t* sp = (intptr_t*) addr_at(sender_sp_offset);
+	jint* sp = (jint*) at(interpreter_frame_sender_sp_offset);
+
+	// This is the sp before any possible extension (adapter/locals).
+	//intptr_t* unextended_sp = interpreter_frame_sender_sp();
+
+	// The interpreter and compiler(s) always save EBP/RBP in a known
+	// location on entry. We must record where that location is
+	// so this if EBP/RBP was live on callout from c2 we can find
+	// the saved copy no matter what it called.
+
+	// Since the interpreter always saves EBP/RBP if we record where it is then
+	// we don't have to always save EBP/RBP on entry and exit to c2 compiled
+	// code, on entry will be enough.
+#ifdef COMPILER2
+	if (map->update_map()) {
+		map->set_location(ebp->as_VMReg(), (address) addr_at(link_offset));
+	}
+#endif /* COMPILER2 */
+	//return frame(sp, unextended_sp, link(), sender_pc());
+	return frame(sp, link(), sender_pc());
+}
+
+
+//------------------------------sender_for_compiled_frame-----------------------
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+	assert(map != NULL, "map must be set");
+
+	const bool c1_compiled = _cb->is_compiled_by_c1();
+	// frame owned by optimizing compiler
+	jint* sender_sp = NULL;
+	bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method();
+
+	assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+	//FIXME , may be error here , do MIPS have the return address and link address on the stack?
+
+	sender_sp = sp() + _cb->frame_size();
+#ifdef ASSERT
+	if (c1_compiled && native) {
+		assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size");
+	}
+#endif // ASSERT
+	// On Intel the return_address is always the word on the stack
+	// the fp in compiler points to sender fp, but in interpreter, fp points to return address,
+	// so getting sender for compiled frame is not same as interpreter frame.
+	// we hard code here temporarily
+	// spark
+	address sender_pc = (address) *(sender_sp-1);
+
+	jint *saved_fp = (jint*)*(sender_sp - frame::sender_sp_offset);
+
+	// so getting sender for compiled frame is not same as interpreter frame.
+	// we hard code here temporarily
+	// spark
+
+	if (map->update_map()) {
+		// Tell GC to use argument oopmaps for some runtime stubs that need it.
+		// For C1, the runtime stub might not have oop maps, so set this flag
+		// outside of update_register_map.
+
+		map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+		if (_cb->oop_maps() != NULL) {
+			OopMapSet::update_register_map(this, map);
+		}
+		// Since the prolog does the save and restore of epb there is no oopmap
+		// for it so we must fill in its location as if there was an oopmap entry
+		// since if our caller was compiled code there could be live jvm state in it.
+		//   map->set_location(ebp->as_VMReg(), (address) (sender_sp - frame::sender_sp_offset));
+		map->set_location(FP->as_VMReg(), (address) (sender_sp - frame::sender_sp_offset));
+	}
+	assert(sender_sp != sp(), "must have changed");
+	return frame(sender_sp, saved_fp, sender_pc);
+}
+
+frame frame::sender(RegisterMap* map) const {
+	// Default is we done have to follow them. The sender_for_xxx will
+	// update it accordingly
+	map->set_include_argument_oops(false);
+
+	if (is_entry_frame())       return sender_for_entry_frame(map);
+	if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
+	assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+	if (_cb != NULL) {
+		return sender_for_compiled_frame(map);
+	}
+	// Must be native-compiled frame, i.e. the marshaling code for native
+	// methods that exists in the core system.
+	return frame(sender_sp(), link(), sender_pc());
+}
+
+
+bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
+	assert(is_interpreted_frame(), "must be interpreter frame");
+	methodOop method = interpreter_frame_method();
+	// When unpacking an optimized frame the frame pointer is
+	// adjusted with:
+	int diff = (method->max_locals() - method->size_of_parameters()) *
+		Interpreter::stackElementWords();
+	printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%x \n", (int)(fp-diff));
+	return _fp == (fp - diff);
+}
+
+void frame::pd_gc_epilog() {
+	// nothing done here now
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+	// QQQ
+#ifdef CC_INTERP
+#else
+	assert(is_interpreted_frame(), "Not an interpreted frame");
+	// These are reasonable sanity checks
+	if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
+		return false;
+	}
+	if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
+		return false;
+	}
+	if (fp() + interpreter_frame_initial_sp_offset < sp()) {
+		return false;
+	}
+	// These are hacks to keep us out of trouble.
+	// The problem with these is that they mask other problems
+	if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
+		return false;
+	}
+
+	// do some validation of frame elements
+
+	// first the method
+
+	methodOop m = *interpreter_frame_method_addr();
+
+	// validate the method we'd find in this potential sender
+	if (!Universe::heap()->is_valid_method(m)) return false;
+
+	// stack frames shouldn't be much larger than max_stack elements
+
+	//if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
+	if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
+		return false;
+	}
+
+	// validate bci/bcx
+
+	intptr_t  bcx    = interpreter_frame_bcx();
+	if (m->validate_bci_from_bcx(bcx) < 0) {
+		return false;
+	}
+
+	// validate constantPoolCacheOop
+
+	constantPoolCacheOop cp = *interpreter_frame_cache_addr();
+
+	if (cp == NULL ||
+			!Space::is_aligned(cp) ||
+			!Universe::heap()->is_permanent((void*)cp)) return false;
+
+	// validate locals
+
+	address locals =  (address) *interpreter_frame_locals_addr();
+
+	if (locals > thread->stack_base() || locals < (address) fp()) return false;
+
+	// We'd have to be pretty unlucky to be mislead at this point
+
+#endif // CC_INTERP
+	return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+#ifdef CC_INTERP
+	// Needed for JVMTI. The result should always be in the interpreterState object
+	assert(false, "NYI");
+	interpreterState istate = get_interpreterState();
+#endif // CC_INTERP
+	assert(is_interpreted_frame(), "interpreted frame expected");
+	methodOop method = interpreter_frame_method();
+	BasicType type = method->result_type();
+
+	intptr_t* tos_addr;
+	if (method->is_native()) {
+		// Prior to calling into the runtime to report the method_exit the possible
+		// return value is pushed to the native stack. If the result is a jfloat/jdouble
+		// then ST0 is saved before EAX/EDX. See the note in generate_native_result
+		tos_addr = (intptr_t*)sp();
+		if (type == T_FLOAT || type == T_DOUBLE) {
+			// QQQ seems like this code is equivalent on the two platforms
+#ifdef AMD64
+			// This is times two because we do a push(ltos) after pushing XMM0
+			// and that takes two interpreter stack slots.
+			tos_addr += 2 * Interpreter::stackElementWords();
+#else
+			tos_addr += 2;
+#endif // AMD64
+		}
+	} else {
+		tos_addr = (intptr_t*)interpreter_frame_tos_address();
+	}
+
+	switch (type) {
+		case T_OBJECT  :
+		case T_ARRAY   : {
+					 oop obj;
+					 if (method->is_native()) {
+#ifdef CC_INTERP
+						 obj = istate->_oop_temp;
+#else
+						 obj = (oop) at(interpreter_frame_oop_temp_offset);
+#endif // CC_INTERP
+					 } else {
+						 oop* obj_p = (oop*)tos_addr;
+						 obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
+					 }
+					 assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+					 *oop_result = obj;
+					 break;
+				 }
+		case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
+		case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
+		case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
+		case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
+		case T_INT     : value_result->i = *(jint*)tos_addr; break;
+		case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
+		case T_FLOAT   : {
+#ifdef AMD64
+					 value_result->f = *(jfloat*)tos_addr;
+#else
+					 if (method->is_native()) {
+						 jdouble d = *(jdouble*)tos_addr;  // Result was in ST0 so need to convert to jfloat
+						 value_result->f = (jfloat)d;
+					 } else {
+						 value_result->f = *(jfloat*)tos_addr;
+					 }
+#endif // AMD64
+					 break;
+				 }
+		case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
+		case T_VOID    : /* Nothing to do */ break;
+		default        : ShouldNotReachHere();
+	}
+
+	return type;
+}
+
+
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+	int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+	return &interpreter_frame_tos_address()[index];
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/frame_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,221 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A frame represents a physical stack frame (an activation).  Frames can be
+// C or Java frames, and the Java frames can be interpreted or compiled.
+// In contrast, vframes represent source-level activations, so that one physical frame
+// can correspond to multiple source level frames because of inlining.
+// A frame is comprised of {pc, fp, sp}
+// ------------------------------ Asm interpreter ----------------------------------------
+// Layout of asm interpreter frame:
+//    [expression stack      ] * <- sp
+//    [monitors              ]   \
+//     ...                        | monitor block size
+//    [monitors              ]   /
+//    [monitor block size    ]
+//    [byte code index/pointr]                   = bcx()                bcx_offset
+//    [pointer to locals     ]                   = locals()             locals_offset
+//    [constant pool cache   ]                   = cache()              cache_offset
+//    [methodData            ]                   = mdp()                mdx_offset
+//    [methodOop             ]                   = method()             method_offset
+//    [last sp               ]                   = last_sp()            last_sp_offset
+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
+//    [old frame pointer     ]   <- fp           = link()
+//    [return pc             ]
+//    [oop temp              ]                     (only for native calls)
+//    [locals and parameters ]
+//                               <- sender sp
+// ------------------------------ Asm interpreter ----------------------------------------
+
+// ------------------------------ C++ interpreter ----------------------------------------
+//
+// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
+//
+//                             <- SP (current esp/rsp)
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    ...                        BytecodeInterpreter::run local variables
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's ebp/rbp ]
+//    [return pc               ]  (return to frame manager)
+//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
+//    [expression stack        ] <- last_Java_sp                           |
+//    [...                     ] * <- interpreter_state.stack              |
+//    [expression stack        ] * <- interpreter_state.stack_base         |
+//    [monitors                ]   \                                       |
+//     ...                          | monitor block size                   |
+//    [monitors                ]   / <- interpreter_state.monitor_base     |
+//    [struct interpretState   ] <-----------------------------------------|
+//    [return pc               ] (return to callee of frame manager [1]
+//    [locals and parameters   ]
+//                               <- sender sp
+
+// [1] When the c++ interpreter calls a new method it returns to the frame
+//     manager which allocates a new frame on the stack. In that case there
+//     is no real callee of this newly allocated frame. The frame manager is
+//     aware of the  additional frame(s) and will pop them as nested calls
+//     complete. Howevers tTo make it look good in the debugger the frame
+//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
+//     with a fake interpreter_state* parameter to make it easy to debug
+//     nested calls.
+
+// Note that contrary to the layout for the assembly interpreter the
+// expression stack allocated for the C++ interpreter is full sized.
+// However this is not as bad as it seems as the interpreter frame_manager
+// will truncate the unused space on succesive method calls.
+//
+// ------------------------------ C++ interpreter ----------------------------------------
+
+// Layout of interpreter frame:
+//
+//    [ monitor entry            ] <--- sp
+//      ...
+//    [ monitor entry            ]
+// -7 [ monitor block top        ] ( the top monitor entry )
+// -6 [ byte code pointer        ] (if native, bcp = 0)
+// -5 [ constant pool cache      ]
+// -4 [ methodData      	 ] mdx_offset(not core only)
+// -3 [ methodOop                ]
+// -2 [ locals offset		 ]
+// -1 [ sender's sp              ]
+//  0 [ sender's fp              ] <--fp
+//  1 [ return address           ]
+//  2 [ oop temp offset 	 ] (only for native calls)
+//  3 [ result handler offset	 ] (only for native calls)
+//  4 [ result type info         ] (only for native calls)
+//    [ local var m-1            ]
+//      ...
+//    [ local var 0              ]
+//    [ argumnet word n-1        ] <--- ( sender's sp )
+//        ...
+//    [ argument word 0          ] <--- S7
+
+ public:
+  enum {
+    pc_return_offset                                 =  0,
+    // All frames
+    link_offset                                      =  0,
+    return_addr_offset                               =  1,
+    // non-interpreter frames
+    sender_sp_offset                                 =  2,
+
+#ifndef CC_INTERP
+
+    // Interpreter frames
+		interpreter_frame_return_addr_offset             =  1,
+    interpreter_frame_result_handler_offset          =  3, // for native calls only
+    interpreter_frame_oop_temp_offset                =  2, // for native calls only
+
+    interpreter_frame_sender_sp_offset               = -1,
+		interpreter_frame_sender_fp_offset         			 =  0,
+    // outgoing sp before a call to an invoked method
+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
+		interpreter_frame_locals_offset        					 = interpreter_frame_last_sp_offset - 1,
+    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
+    interpreter_frame_mdx_offset                     = interpreter_frame_method_offset - 1,
+    interpreter_frame_cache_offset                   = interpreter_frame_mdx_offset - 1,
+//    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
+    interpreter_frame_bcx_offset                     = interpreter_frame_cache_offset - 1,
+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
+
+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
+
+#endif // CC_INTERP
+
+    // Entry frames
+#ifdef AMD64
+#ifdef _WIN64
+    entry_frame_after_call_words                     =  8,
+    entry_frame_call_wrapper_offset                  =  2,
+
+    arg_reg_save_area_bytes                          = 32, // Register argument save area
+#else
+    entry_frame_after_call_words                     = 13,
+    entry_frame_call_wrapper_offset                  = -6,
+
+    arg_reg_save_area_bytes                          =  0,
+#endif // _WIN64
+#else
+    entry_frame_call_wrapper_offset                  =  2,
+#endif // AMD64
+
+    // Native frames
+
+    native_frame_initial_param_offset                =  2
+
+  };
+
+  intptr_t ptr_at(int offset) const {
+    return *ptr_at_addr(offset);
+  }
+
+  void ptr_at_put(int offset, intptr_t value) {
+    *ptr_at_addr(offset) = value;
+  }
+
+ private:
+  // an additional field beyond _sp and _pc:
+  intptr_t*   _fp; // frame pointer
+  // The interpreter and adapters will extend the frame of the caller.
+  // Since oopMaps are based on the sp of the caller before extension
+  // we need to know that value. However in order to compute the address
+  // of the return address we need the real "raw" sp. Since sparc already
+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
+  // original sp we use that convention.
+
+  intptr_t*     _unextended_sp;
+
+  intptr_t* ptr_at_addr(int offset) const {
+    return (intptr_t*) addr_at(offset);
+  }
+
+ public:
+  // Constructors
+
+  frame(intptr_t* sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* fp);
+
+  // accessors for the instance variables
+  intptr_t*   fp() const { return _fp; }
+
+  inline address* sender_pc_addr() const;
+
+  // return address of param, zero origin index.
+  inline address* native_param_addr(int idx) const;
+
+  // expression stack tos if we are nested in a java call
+  intptr_t* interpreter_frame_last_sp() const;
+
+#ifndef CC_INTERP
+  // deoptimization support
+  void interpreter_frame_set_last_sp(intptr_t* sp);
+#endif // CC_INTERP
+
+#ifdef CC_INTERP
+  inline interpreterState get_interpreterState() const;
+#endif // CC_INTERP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,294 @@
+/*
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Inline functions for Loongson frames:
+
+// Constructors:
+
+inline frame::frame() {
+  _pc = NULL;
+  _sp = NULL;
+  _unextended_sp = NULL;
+  _fp = NULL;
+  _cb = NULL;
+  _deopt_state = unknown;
+}
+
+inline frame:: frame(intptr_t* sp, intptr_t* fp, address pc) {
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  _deopt_state = not_deoptimized;
+  if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) {
+    _pc = (((nmethod*)_cb)->get_original_pc(this));
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame:: frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
+  _sp = sp;
+  _unextended_sp = unextended_sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  _deopt_state = not_deoptimized;
+  if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) {
+    _pc = (((nmethod*)_cb)->get_original_pc(this));
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp) {
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = (address)(sp[-1]);
+
+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
+  // unlucky the junk value could be to a zombied method and we'll die on the
+  // find_blob call. This is also why we can have no asserts on the validity
+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
+  // call a specilaized frame constructor instead of this one.
+  // Then we could use the assert below. However this assert is of somewhat dubious
+  // value.
+  // assert(_pc != NULL, "no pc?");
+
+  _cb = CodeCache::find_blob(_pc);
+
+  _deopt_state = not_deoptimized;
+  if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) {
+    _pc = (((nmethod*)_cb)->get_original_pc(this));
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+// Accessors
+
+inline bool frame::equal(frame other) const {
+  bool ret =  sp() == other.sp()
+              && unextended_sp() == other.unextended_sp()
+              && fp() == other.fp()
+              && pc() == other.pc();
+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
+  return ret;
+}
+
+// Return unique id for this frame. The id must have a value where we can distinguish
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
+// frame.
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
+
+// Relationals on frames based
+// Return true if the frame is younger (more recent activation) than the frame represented by id
+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() < id ; }
+
+// Return true if the frame is older (less recent activation) than the frame represented by id
+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() > id ; }
+
+
+
+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
+
+
+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
+
+// Return address:
+
+inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
+inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
+
+// return address of param, zero origin index.
+inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); }
+
+#ifdef CC_INTERP
+
+inline interpreterState frame::get_interpreterState() const {
+  return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize ));
+}
+
+inline intptr_t*    frame::sender_sp()        const {
+  // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames?
+  if (is_interpreted_frame()) {
+    assert(false, "should never happen");
+    return get_interpreterState()->sender_sp();
+  } else {
+    return            addr_at(sender_sp_offset);
+  }
+}
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_locals);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_bcp);
+}
+
+
+// Constant pool cache
+
+inline constantPoolCacheOop* frame::interpreter_frame_cache_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_constants);
+}
+
+// Method
+
+inline methodOop* frame::interpreter_frame_method_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_method);
+}
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_mdx);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  assert(is_interpreted_frame(), "wrong frame type");
+  return get_interpreterState()->_stack + 1;
+}
+
+#else /* asm interpreter */
+inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_last_sp() const {
+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_bcx_offset);
+}
+
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_mdx_offset);
+}
+
+
+
+// Constant pool cache
+
+inline constantPoolCacheOop* frame::interpreter_frame_cache_addr() const {
+  return (constantPoolCacheOop*)addr_at(interpreter_frame_cache_offset);
+}
+
+// Method
+
+inline methodOop* frame::interpreter_frame_method_addr() const {
+  return (methodOop*)addr_at(interpreter_frame_method_offset);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  intptr_t* last_sp = interpreter_frame_last_sp();
+  if (last_sp == NULL ) {
+    return sp();
+  } else {
+    // sp() may have been extended by an adapter
+    assert(last_sp < fp() && last_sp >= sp(), "bad tos");
+    return last_sp;
+  }
+}
+
+#endif /* CC_INTERP */
+
+inline int frame::pd_oop_map_offset_adjustment() const {
+  return 0;
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+  return BasicObjectLock::size();
+}
+
+
+// expression stack
+// (the max_stack arguments are used by the GC; see class FrameClosure)
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
+  return monitor_end-1;
+}
+
+
+inline jint frame::interpreter_frame_expression_stack_direction() { return -1; }
+
+
+// Entry frames
+
+inline JavaCallWrapper* frame::entry_frame_call_wrapper() const {
+ return (JavaCallWrapper*)at(entry_frame_call_wrapper_offset);
+}
+
+
+// Compiled frames
+
+inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - local_index + (local_index < nof_args ? 1: -1));
+}
+
+inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors);
+}
+
+inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1);
+}
+
+inline bool frame::volatile_across_calls(Register reg) {
+  return true;
+}
+
+
+
+inline oop frame::saved_oop_result(RegisterMap* map) const       {
+  return *((oop*) map->location(V0->as_VMReg()));
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+  *((oop*) map->location(V0->as_VMReg())) = obj;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,29 @@
+/*
+ * Copyright 1999-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Size of MIPS Instructions
+const int BytesPerInstWord = 4;
+
+const int StackAlignmentInBytes = (2*wordSize);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/globals_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+//
+#ifdef CORE
+define_pd_global(bool,  UseSSE,      0);
+#endif /* CORE */
+define_pd_global(bool,  ConvertSleepToYield,      true);
+define_pd_global(bool,  ShareVtableStubs,         true);
+define_pd_global(bool,  CountInterpCalls,         true);
+
+define_pd_global(bool, ImplicitNullChecks,          true);  // Generate code for implicit null checks
+define_pd_global(bool, UncommonNullCast,            true);  // Uncommon-trap NULLs past to check cast
+define_pd_global(bool, NeedsDeoptSuspend,           false); // only register window machines need this
+
+// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
+// assign a different value for C2 without touching a number of files. Use
+// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
+// c1 doesn't have this problem because the fix to 4858033 assures us
+// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
+// the uep and the vep doesn't get real alignment but just slops on by
+// only assured that the entry instruction meets the 5 byte size requirement.
+define_pd_global(intx,  CodeEntryAlignment,       32);
+
+define_pd_global(uintx, TLABSize,                 0);
+define_pd_global(uintx, NewSize,                  1024 * K);
+define_pd_global(intx,  InlineFrequencyCount,     100);
+define_pd_global(intx,  PreInflateSpin,		  10);
+
+define_pd_global(intx, PrefetchCopyIntervalInBytes, -1);
+define_pd_global(intx, PrefetchScanIntervalInBytes, -1);
+define_pd_global(intx, PrefetchFieldsAhead,         -1);
+
+define_pd_global(intx, StackYellowPages, 2);
+define_pd_global(intx, StackRedPages, 1);
+define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1));
+
+define_pd_global(bool, RewriteBytecodes,     true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+
+// Only c2 cares about this at the moment
+define_pd_global(intx, AllocatePrefetchStyle,        2);
+define_pd_global(intx, AllocatePrefetchDistance,     -1);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,83 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_icBuffer_mips.cpp.incl"
+
+int InlineCacheBuffer::ic_stub_code_size() {
+  return  28;
+  //return NativeMovConstReg::instruction_size +
+  //       NativeJump::instruction_size +
+  //       1;
+  // so that code_end can be set in CodeBuffer
+  // 64bit 16 = 5 + 10 bytes + 1 byte
+  // 32bit 11 = 10 bytes + 1 byte
+}
+
+
+// we use T1 as cached oop(klass) now. this is the target of virtual call,
+// when reach here, the receiver in T0
+// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, oop cached_oop, address entry_point) {
+  ResourceMark rm;
+  //CodeBuffer      code(code_begin, ic_stub_code_size());
+  CodeBuffer    code (code_begin, ic_stub_code_size());
+  MacroAssembler* masm            = new MacroAssembler(&code);
+  // note: even though the code contains an embedded oop, we do not need reloc info
+  // because
+  // (1) the oop is old (i.e., doesn't matter for scavenges)
+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
+  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
+  //masm->lea(rax, OopAddress((address) cached_oop));
+  //masm->jump(ExternalAddress(entry_point));
+#define __ masm->
+	__ lui(T1, Assembler::split_high((int)cached_oop));
+	__ addiu(T1, T1, Assembler::split_low((int)cached_oop));
+
+	__ lui(T9, Assembler::split_high((int)entry_point));
+	__ addiu(T9, T9, Assembler::split_low((int)entry_point));
+	__ jr(T9);
+	__ delayed()->nop();
+	__ flush();
+#undef __
+}
+
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
+  //NativeJump*        jump = nativeJump_at(move->next_instruction_address());
+  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
+  return jump->jump_destination();
+}
+
+
+oop InlineCacheBuffer::ic_buffer_cached_oop(address code_begin) {
+  // creation also verifies the object
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);
+  // Verifies the jump
+  //NativeJump*        jump = nativeJump_at(move->next_instruction_address());
+  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
+  return (oop)move->data();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/icache_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,58 @@
+/*
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_icache_mips.cpp.incl"
+#include <asm/cachectl.h>
+#include <sys/cachectl.h>
+#include <sys/sysmips.h>
+
+//no need, we just call cacheflush system call to flush cache
+//update @jerome , 12/05/2006
+//flush cache is a very frequent operation, flush all the cache decrease the performance sharply, so i modify it.
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {};
+
+void ICache::call_flush_stub(address start, int lines) {
+	//in fact, the current os implementation simply flush all ICACHE&DCACHE
+	cacheflush(start, lines * line_size , ICACHE);
+//	sysmips(3, 0, 0, 0);
+}
+
+void ICache::invalidate_word(address addr) {
+	//cacheflush(addr, 4, ICACHE);
+
+	cacheflush(addr,4, ICACHE);
+//	sysmips(3, 0, 0, 0);
+}
+
+void ICache::invalidate_range(address start, int nbytes) {
+	cacheflush(start, nbytes, ICACHE);
+//	sysmips(3, 0, 0, 0);
+}
+
+void ICache::invalidate_all() {
+	sysmips(3, 0, 0, 0);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/icache_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,61 @@
+/*
+ * Copyright 1997-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Interface for updating the instruction cache.  Whenever the VM modifies
+// code, part of the processor instruction cache potentially has to be flushed.
+
+// On the x86, this is a no-op -- the I-cache is guaranteed to be consistent
+// after the next jump, and the VM never modifies instructions directly ahead
+// of the instruction fetch path.
+
+// [phh] It's not clear that the above comment is correct, because on an MP
+// system where the dcaches are not snooped, only the thread doing the invalidate
+// will see the update.  Even in the snooped case, a memory fence would be
+// necessary if stores weren't ordered.  Fortunately, they are on all known
+// x86 implementations.
+
+class ICache : public AbstractICache {
+ public:
+  enum {
+    stub_size      = 0,                 // Size of the icache flush stub in bytes
+		//FIXME aoqi
+    //line_size      = BytesPerWord,      // conservative
+    //log2_line_size = LogBytesPerWord    // log2(line_size)
+		line_size      = 32,  // flush instruction affects a dword
+		log2_line_size = 5    // log2(line_size)
+  };
+
+	//nothing to do
+	static void initialize() {}
+
+	static void call_flush_stub(address start, int lines);
+
+	static void invalidate_word(address addr);
+
+	static void invalidate_range(address start, int nbytes);
+
+	static void invalidate_all();
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/interp_masm_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,2284 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_interp_masm_mips.cpp.incl"
+
+
+// Implementation of InterpreterMacroAssembler
+
+#ifdef CC_INTERP
+void InterpreterMacroAssembler::get_method(Register reg) {
+}
+#endif // CC_INTERP
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
+                                                  int number_of_arguments) {
+  // interpreter specific
+  //
+  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
+  //       since these are callee saved registers and no blocking/
+  //       GC can happen in leaf calls.
+  // Further Note: DO NOT save/restore bcp/locals. If a caller has
+  // already saved them so that it can use esi/edi as temporaries
+  // then a save/restore here will DESTROY the copy the caller
+  // saved! There used to be a save_bcp() that only happened in
+  // the ASSERT path (no restore_bcp). Which caused bizarre failures
+  // when jvm built with ASSERTs.
+/*
+#ifdef ASSERT
+  {
+    Label L;
+    cmpptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+    jcc(Assembler::equal, L);
+    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
+         " last_sp != NULL");
+    bind(L);
+  }
+#endif
+  // super call
+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+  // interpreter specific
+  // Used to ASSERT that r13/r14 were equal to frame's bcp/locals
+  // but since they may not have been saved (and we don't want to
+  // save thme here (see note above) the assert is invalid.
+*/
+#ifdef ASSERT
+	save_bcp();
+	{ Label L;
+		//cmpl(Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize),
+		//NULL_WORD);
+		lw(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
+		// jcc(Assembler::equal, L);
+		beq(AT,ZERO,L);
+		delayed()->nop();
+		stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
+		bind(L);
+	}
+#endif
+	// super call
+	MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+	// interpreter specific
+#ifdef ASSERT
+	{ Label L;
+		lw(T3, FP, frame::interpreter_frame_bcx_offset * wordSize);
+		Assembler::beq(BCP, T3, L);
+		delayed()->nop();
+		stop("InterpreterMacroAssembler::call_VM_leaf_base: esi not callee saved?");
+		bind(L);
+	}
+	{ Label L;
+		lw(T3, FP, frame::interpreter_frame_locals_offset * wordSize);
+		Assembler::beq(LVP, T3, L);
+		delayed()->nop();
+		stop("InterpreterMacroAssembler::call_VM_leaf_base: edi not callee saved?");
+		bind(L);
+	}
+#endif
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
+                                             Register java_thread,
+                                             Register last_java_sp,
+                                             address  entry_point,
+                                             int      number_of_arguments,
+                                             bool     check_exceptions) {
+#if 0
+	// interpreter specific
+  //
+  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
+  //       really make a difference for these runtime calls, since they are
+  //       slow anyway. Btw., bcp must be saved/restored since it may change
+  //       due to GC.
+  // assert(java_thread == noreg , "not expecting a precomputed java thread");
+  save_bcp();
+#ifdef ASSERT
+  {
+    Label L;
+    cmpptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+    jcc(Assembler::equal, L);
+    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
+         " last_sp != NULL");
+    bind(L);
+  }
+#endif /* ASSERT */
+  // super call
+  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
+                               entry_point, number_of_arguments,
+                               check_exceptions);
+  // interpreter specific
+  restore_bcp();
+  restore_locals();
+#endif
+#ifdef ASSERT
+	{ Label L;
+		//  cmpl(Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize),
+		//  NULL_WORD);
+		// jcc(Assembler::equal, L);
+		lw(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+		beq(AT,ZERO, L);
+		delayed()->nop();
+		stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
+		bind(L);
+	}
+#endif /* ASSERT */
+	// interpreter specific
+	//
+	// Note: Could avoid restoring locals ptr (callee saved) - however doesn't
+	//       really make a difference for these runtime calls, since they are
+	//       slow anyway. Btw., bcp must be saved/restored since it may change
+	//       due to GC.
+	assert(java_thread == noreg , "not expecting a precomputed java thread");
+	save_bcp();
+	// super call
+	MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+	restore_bcp();
+	restore_locals();
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
+  if (JvmtiExport::can_pop_frame()) {
+    Label L;
+    // Initiate popframe handling only if it is not already being
+    // processed.  If the flag has the popframe_processing bit set, it
+    // means that this code is called *during* popframe handling - we
+    // don't want to reenter.
+    // This method is only called just after the call into the vm in
+    // call_VM_base, so the arg registers are available.
+    /*
+		movl(c_rarg0, Address(r15_thread, JavaThread::popframe_condition_offset()));
+    testl(c_rarg0, JavaThread::popframe_pending_bit);
+    jcc(Assembler::zero, L);
+    testl(c_rarg0, JavaThread::popframe_processing_bit);
+    jcc(Assembler::notZero, L);
+    // Call Interpreter::remove_activation_preserving_args_entry() to get the
+    // address of the same-named entrypoint in the generated interpreter code.
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+    jmp(rax);
+    bind(L);
+		*/
+		Register pop_cond = java_thread;
+		// Not clear if any other register is available...
+		lw(pop_cond, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
+		andi(AT, pop_cond, JavaThread::popframe_pending_bit);
+		beq(AT, ZERO, L);
+		delayed()->andi(AT, pop_cond, JavaThread::popframe_processing_bit);
+		bne(AT, ZERO, L);
+		delayed()->nop();
+		call( CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry), relocInfo::runtime_call_type);
+		delayed()->nop();
+		jr(V0);
+		delayed()->nop();
+		bind(L);
+#ifndef OPT_THREAD
+		get_thread(java_thread);
+#endif
+  }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+#if 0
+	movptr(rcx, Address(r15_thread, JavaThread::jvmti_thread_state_offset()));
+  const Address tos_addr(rcx, JvmtiThreadState::earlyret_tos_offset());
+  const Address oop_addr(rcx, JvmtiThreadState::earlyret_oop_offset());
+  const Address val_addr(rcx, JvmtiThreadState::earlyret_value_offset());
+  switch (state) {
+    case atos: movptr(rax, oop_addr);
+               movptr(oop_addr, (int32_t)NULL_WORD);
+               verify_oop(rax, state);              break;
+    case ltos: movptr(rax, val_addr);                 break;
+    case btos:                                   // fall through
+    case ctos:                                   // fall through
+    case stos:                                   // fall through
+    case itos: movl(rax, val_addr);                 break;
+    case ftos: movflt(xmm0, val_addr);              break;
+    case dtos: movdbl(xmm0, val_addr);              break;
+    case vtos: /* nothing to do */                  break;
+    default  : ShouldNotReachHere();
+  }
+  // Clean up tos value in the thread object
+  movl(tos_addr,  (int) ilgl);
+  movl(val_addr,  (int32_t) NULL_WORD);
+#endif
+	//T5, thread
+	// get_thread(ecx);
+	get_thread(T5);
+	// movl(ecx, Address(ecx, JavaThread::jvmti_thread_state_offset()));
+	lw(T5,T5,in_bytes(JavaThread::jvmti_thread_state_offset()));
+	/*
+	   const Address tos_addr (ecx, JvmtiThreadState::earlyret_tos_offset());
+	   const Address oop_addr (ecx, JvmtiThreadState::earlyret_oop_offset());
+	   const Address val_addr (ecx, JvmtiThreadState::earlyret_value_offset());
+	   const Address val_addr1(ecx, JvmtiThreadState::earlyret_value_offset()
+	   + in_ByteSize(wordSize));
+	   */
+	const Address tos_addr (T5, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
+	const Address oop_addr (T5, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
+	const Address val_addr (T5, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+	const Address val_addr1(T5, in_bytes(JvmtiThreadState::earlyret_value_offset()
+				+ in_ByteSize(wordSize)));
+	//V0, oop_addr,V1,val_addr
+	switch (state) {
+		case atos:
+			//movl(eax, oop_addr);
+			lw(V0, oop_addr);
+			// movl(oop_addr, NULL_WORD);
+			sw(ZERO, oop_addr);
+			//verify_oop(eax, state);       break;
+			verify_oop(V0, state);
+			break;
+		case ltos:
+			// movl(edx, val_addr1);               // fall through
+			lw(V1, val_addr1);               // fall through
+		case btos:                                     // fall through
+		case ctos:                                     // fall through
+		case stos:                                     // fall through
+		case itos:
+			//	movl(eax, val_addr);
+			lw(V0, val_addr);
+			break;
+			//FIXME ,I hava no idear fld store to where @jerome
+		case ftos:
+			//fld_s(val_addr);
+			lwc1(F0,T5, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+			break;
+		case dtos:
+			//fld_d(val_addr);
+			lwc1(F0,T5, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+			lwc1(F1,T5, in_bytes(JvmtiThreadState::earlyret_value_offset())+4);
+			break;
+		case vtos: /* nothing to do */                    break;
+		default  : ShouldNotReachHere();
+	}
+	// Clean up tos value in the thread object
+	// movl(tos_addr,  (int) ilgl);
+	//addi(AT,ZERO,(int)ilgl);
+	move(AT, (int)ilgl);
+	// movl(val_addr,  NULL_WORD);
+	sw(ZERO,T5, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+	//  movl(val_addr1, NULL_WORD);
+	sw(ZERO, T5, in_bytes(JvmtiThreadState::earlyret_value_offset() + in_ByteSize(wordSize)));
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
+  if (JvmtiExport::can_force_early_return()) {
+    Label L;
+    //movptr(c_rarg0, Address(r15_thread, JavaThread::jvmti_thread_state_offset()));
+		Register tmp = java_thread;
+		lw(tmp,tmp, in_bytes(JavaThread::jvmti_thread_state_offset()));
+    //testptr(c_rarg0, c_rarg0);
+    //jcc(Assembler::zero, L); // if (thread->jvmti_thread_state() == NULL) exit;
+		beq(tmp,ZERO,L);
+		delayed()->nop();
+
+    // Initiate earlyret handling only if it is not already being processed.
+    // If the flag has the earlyret_processing bit set, it means that this code
+    // is called *during* earlyret handling - we don't want to reenter.
+    //movl(c_rarg0, Address(c_rarg0, JvmtiThreadState::earlyret_state_offset()));
+		lw(tmp, tmp, in_bytes(JvmtiThreadState::earlyret_state_offset()));
+    //cmpl(c_rarg0, JvmtiThreadState::earlyret_pending);
+    //jcc(Assembler::notEqual, L);
+		move(AT, JvmtiThreadState::earlyret_pending);
+		beq(tmp,AT,L);
+		delayed()->nop();
+		get_thread(java_thread);
+
+    // Call Interpreter::remove_activation_early_entry() to get the address of the
+    // same-named entrypoint in the generated interpreter code.
+    //movptr(c_rarg0, Address(r15_thread, JavaThread::jvmti_thread_state_offset()));
+		lw(tmp,java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
+    //movl(c_rarg0, Address(c_rarg0, JvmtiThreadState::earlyret_tos_offset()));
+		lw(AT,tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
+		push(AT);
+		call(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry),
+				relocInfo::runtime_call_type);
+    //call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), c_rarg0);
+    //jmp(rax);
+    //bind(L);
+		jr(V0);
+		delayed()->nop();
+		bind(L);
+		get_thread(java_thread);
+  }
+}
+
+
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(
+							  Register reg,
+							  int bcp_offset) {
+	assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
+	load_two_bytes_from_at_bcp(reg, AT, bcp_offset);
+	hswap(reg);
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
+                                                           Register index,
+                                                           int bcp_offset) {
+  	assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  	assert(cache != index, "must use different registers");
+	load_two_bytes_from_at_bcp(index, AT, bcp_offset);
+	lw(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
+	assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+}
+
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+								Register tmp,
+								int bcp_offset) {
+	assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+	assert(cache != tmp, "must use different register");
+
+	load_two_bytes_from_at_bcp(AT, tmp, bcp_offset);
+	assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+	// convert from field index to ConstantPoolCacheEntry index
+	// and from word offset to byte offset
+	sll(AT, AT, 4);
+	lw(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
+	// skip past the header
+	addi(cache, cache, in_bytes(constantPoolCacheOopDesc::base_offset()));
+	add(cache, cache, AT);
+}
+
+
+// Resets LVP to locals.  Register sub_klass cannot be any of the above.
+void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
+	//  assert( Rsub_klass != eax, "eax holds superklass" );
+	//  assert( Rsub_klass != ecx, "ecx holds 2ndary super array length" );
+	//  assert( Rsub_klass != edi, "edi holds 2ndary super array scan ptr" );
+	Label not_subtype, loop;
+
+	// Load the super-klass's check offset into T5
+	lw( T5, Rsup_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes());
+	// Load from the sub-klass's super-class display list, or a 1-word cache of
+	// the secondary superclass list, or a failing value with a sentinel offset
+	// if the super-klass is an interface or exceptionally deep in the Java
+	// hierarchy and we have to scan the secondary superclass list the hard way.
+	// See if we get an immediate positive hit
+	//add(T6, Rsub_klass, T5);
+/*
+	move(AT, (int)&jerome1 );
+	 sw(T2, AT, 0);
+	 move(AT, (int)&jerome2 );
+	 sw(ZERO, AT, 0);
+	 move(AT, (int)&jerome3 );
+	 sw(ZERO, AT, 0);
+	 move(AT, (int)&jerome4 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome5 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome6 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome7 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome8 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome9 );
+	 sw(ZERO, AT, 0);
+	 move(AT, (int)&jerome10 );
+	 sw(ZERO, AT, 0);
+
+
+	 pushad();
+//	__ enter();
+	 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+				relocInfo::runtime_call_type);
+	 delayed()->nop();
+//	__ leave();
+	popad();
+
+	 move(AT, (int)&jerome1 );
+	 sw(T2, AT, 0);
+*/
+
+	add(AT, Rsub_klass, T5);
+	lw(T6, AT, 0);
+/*
+	 move(T2, (int)&jerome2 );
+	 sw(AT, T2, 0);
+	 move(AT, (int)&jerome3 );
+	 sw(ZERO, AT, 0);
+	 move(AT, (int)&jerome4 );
+	 sw(ZERO, AT, 0);
+
+
+	move(AT, (int)&jerome5 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome6 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome7 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome8 );
+	 sw(ZERO, AT, 0);
+
+	 move(AT, (int)&jerome9 );
+	 sw(ZERO, AT, 0);
+	 move(AT, (int)&jerome10 );
+	 sw(ZERO, AT, 0);
+	 move(AT, (int)&jerome1 );
+	 lw(T2, AT, 0);
+
+
+
+	 pushad();
+//	__ enter();
+	 call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+				relocInfo::runtime_call_type);
+	 delayed()->nop();
+//	__ leave();
+	popad();
+
+*/
+
+	Assembler::beq(T6, Rsup_klass, ok_is_subtype);
+	//	nop();
+
+	// Check for immediate negative hit
+	delayed()->nop();
+	move( T6, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+	bne(T6, T5, not_subtype);
+	delayed()->nop();
+	// Check for self
+	beq(Rsub_klass, Rsup_klass, ok_is_subtype);
+	delayed()->nop();
+
+	// Now do a linear scan of the secondary super-klass chain.
+	lw(T6, Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes());
+	// T6 (edi)holds the objArrayOop of secondary supers.
+	lw(T5, T6, arrayOopDesc::length_offset_in_bytes());// Load the array length
+	// Skip to start of data; also clear Z flag incase ECX is zero
+	addi(T6, T6, arrayOopDesc::base_offset_in_bytes(T_OBJECT) );
+	// Scan T5 words at [T6]([EDI]) for occurance of Rsup_klass
+	// Set NZ/Z based on last compare
+	///  repne_scan();
+
+	Label Loop, subtype;
+	bind(Loop);
+	beq(T5, ZERO, not_subtype);
+	delayed()->nop();
+	lw(T8, T6, 0);
+	beq(T8, Rsup_klass, subtype);
+	delayed()->addi(T6, T6, 1 * wordSize);
+	b(Loop);
+	delayed()->addi(T5, T5, -1);
+
+
+	//  restore_locals();           // Restore EDI; Must not blow flags
+	// Not equal?
+	// Must be equal but missed in cache.  Update cache.
+	bind(subtype);
+	sw( Rsup_klass, Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
+	b( ok_is_subtype );
+	delayed()->nop();
+
+	bind(not_subtype);
+}
+
+
+
+// Java Expression Stack
+
+#ifdef ASSERT
+// Verifies that the stack tag matches.  Must be called before the stack
+// value is popped off the stack.
+void InterpreterMacroAssembler::verify_stack_tag(frame::Tag t) {
+  if (TaggedStackInterpreter) {
+/*
+		frame::Tag tag = t;
+    if (t == frame::TagCategory2) {
+      tag = frame::TagValue;
+      Label hokay;
+      cmpptr(Address(rsp, 3*wordSize), (int32_t)tag);
+      jcc(Assembler::equal, hokay);
+      stop("Java Expression stack tag high value is bad");
+      bind(hokay);
+    }
+    Label okay;
+    cmpptr(Address(rsp, wordSize), (int32_t)tag);
+    jcc(Assembler::equal, okay);
+    // Also compare if the stack value is zero, then the tag might
+    // not have been set coming from deopt.
+    cmpptr(Address(rsp, 0), 0);
+    jcc(Assembler::equal, okay);
+*/
+		Label okay;
+		lw(AT,SP, wordSize);
+		addi(AT,AT,-(int)t);
+		beq(AT,ZERO,okay); delayed()->nop();
+		lw(AT,SP,0);
+		beq(AT,ZERO,okay);
+		delayed()->nop();
+		stop("Java Expression stack tag value is bad");
+    bind(okay);
+  }
+}
+#endif // ASSERT
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+  debug_only(verify_stack_tag(frame::TagReference));
+  pop(r);
+  //if (TaggedStackInterpreter) addptr(rsp, 1 * wordSize);
+	if (TaggedStackInterpreter) addi(SP,SP, 1 * wordSize);
+}
+
+void InterpreterMacroAssembler::pop_ptr(Register r, Register tag) {
+  pop(r);
+  if (TaggedStackInterpreter) pop(tag);
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+  // XXX can't use pop currently, upper half non clean
+  debug_only(verify_stack_tag(frame::TagValue));
+  //movl(r, Address(rsp, 0));
+  //addptr(rsp, wordSize);
+  pop(r);
+	//if (TaggedStackInterpreter) addptr(rsp, 1 * wordSize);
+	if (TaggedStackInterpreter) addi(SP,SP, 1 * wordSize);
+}
+/*
+void InterpreterMacroAssembler::pop_l(Register r) {
+  debug_only(verify_stack_tag(frame::TagCategory2));
+  //movq(r, Address(rsp, 0));
+  //addptr(rsp, 2 * Interpreter::stackElementSize());
+	//FIXME, this directly call assembler. by aoqi
+	ld(r, SP, 0);
+	addi(SP, SP, 8);
+	if (TaggedStackInterpreter) addi(SP,SP, 2 * wordSize);
+}
+*/
+//FIXME How many registers do push_l & pop_l use? aoqi
+void InterpreterMacroAssembler::pop_l(Register lo, Register hi) {
+  debug_only(verify_stack_tag(frame::TagValue));
+  //popl(lo);
+	pop(lo);
+	//if (TaggedStackInterpreter) addl(esp, 1 * wordSize);
+	if (TaggedStackInterpreter) addi(SP,SP, 1 * wordSize);
+	debug_only(verify_stack_tag(frame::TagValue));
+	//popl(hi);
+	pop(hi);
+	//if (TaggedStackInterpreter) addl(esp, 1 * wordSize);
+	if (TaggedStackInterpreter) addi(SP,SP, 1 * wordSize);
+}
+
+//void InterpreterMacroAssembler::pop_f(XMMRegister r) {
+void InterpreterMacroAssembler::pop_f() {
+  debug_only(verify_stack_tag(frame::TagValue));
+  //movflt(r, Address(rsp, 0));
+  //addptr(rsp, wordSize);
+  //if (TaggedStackInterpreter) addptr(rsp, 1 * wordSize);
+	lwc1(F0,SP,0);
+	addi(SP,SP, 1 * wordSize);
+	if (TaggedStackInterpreter) addi(SP,SP, 1 * wordSize);
+}
+
+//void InterpreterMacroAssembler::pop_d(XMMRegister r) {
+void InterpreterMacroAssembler::pop_d() {
+  debug_only(verify_stack_tag(frame::TagCategory2));
+  //movdbl(r, Address(rsp, 0));
+  //addptr(rsp, 2 * Interpreter::stackElementSize());
+	pop_dtos_to_esp();
+	lwc1(F0,SP,0);
+	lwc1(F1,SP,4);
+	addi(SP,SP, 2 * wordSize);
+}
+
+// Pop the top of the java expression stack to execution stack (which
+// happens to be the same place).
+//FIXME ,I hava no idea which register to use
+void InterpreterMacroAssembler::pop_dtos_to_esp() {
+	if (TaggedStackInterpreter) {
+		// Pop double value into scratch registers
+		debug_only(verify_stack_tag(frame::TagValue));
+		//  popl(eax);
+		pop(V0);
+		//addl(esp, 1* wordSize);
+		addi(SP,SP, 1* wordSize);
+		debug_only(verify_stack_tag(frame::TagValue));
+		//popl(edx);
+		pop(V1);
+		//addl(esp, 1* wordSize);
+		addi(SP,SP, 1* wordSize);
+		// pushl(edx);
+		push(V1);
+		//pushl(eax);
+		push(V0);
+	}
+}
+
+void InterpreterMacroAssembler::pop_ftos_to_esp() {
+  if (TaggedStackInterpreter) {
+    debug_only(verify_stack_tag(frame::TagValue));
+		//  popl(eax);
+		pop(V0);
+		//addl(esp, 1 * wordSize);
+		addi(SP,SP, 1 * wordSize);
+		// pushl(eax);  // ftos is at esp
+		push(V0);  // ftos is at esp
+	}
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+  //if (TaggedStackInterpreter) push(frame::TagReference);
+	if (TaggedStackInterpreter) {
+		move(AT, frame::TagReference);
+		push(AT);
+	}//pushl(r);
+  push(r);
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r, Register tag) {
+  //if (TaggedStackInterpreter) push(tag);
+	if (TaggedStackInterpreter){
+		move(AT, tag);
+		push(AT);  // tag first
+	}
+  push(r);
+}
+
+void InterpreterMacroAssembler::push_i(Register r) {
+  //if (TaggedStackInterpreter) push(frame::TagValue);
+	if (TaggedStackInterpreter) {
+		move(AT, frame::TagValue);
+		push(AT);
+	}
+  push(r);
+}
+/*
+void InterpreterMacroAssembler::push_l(Register r) {
+  if (TaggedStackInterpreter) {
+    //push(frame::TagValue);
+    //subptr(rsp, 1 * wordSize);
+    //push(frame::TagValue);
+    //subptr(rsp, 1 * wordSize);
+		move(AT, frame::TagValue);
+		push(AT);
+  } else {
+    addi(SP, SP, (-2) * wordSize);
+  }
+  //movq(Address(rsp, 0), r);
+	//FIXME, same as pop_l
+	sd(r, SP, 0);
+}
+*/
+//FIXME How many registers do push_l & pop_l use? aoqi
+void InterpreterMacroAssembler::push_l(Register lo, Register hi) {
+  //if (TaggedStackInterpreter) pushl(frame::TagValue);
+  if (TaggedStackInterpreter) {
+    move(AT, frame::TagValue);
+		push(AT);
+	}
+	//pushl(hi);
+	push(hi);
+	//if (TaggedStackInterpreter) pushl(frame::TagValue);
+	if (TaggedStackInterpreter) {
+		move(AT, frame::TagValue);
+		push(AT);
+	}
+	//pushl(lo);
+	push(lo);
+}
+//void InterpreterMacroAssembler::push_f(XMMRegister r) {
+void InterpreterMacroAssembler::push_f() {
+  //if (TaggedStackInterpreter) push(frame::TagValue);
+  //subptr(rsp, wordSize);
+  //movflt(Address(rsp, 0), r);
+	if (TaggedStackInterpreter) {
+		move(AT, frame::TagValue);
+		push(AT);
+	}// Do not schedule for no AGI! Never write beyond esp!
+	addi(SP, SP, (-1) * wordSize);
+	swc1(FSF, SP, 0 * wordSize);
+}
+
+//FIXME, Register r should be float ?
+void InterpreterMacroAssembler::push_d(FloatRegister r) {
+//void InterpreterMacroAssembler::push_d(XMMRegister r) {
+  if (TaggedStackInterpreter) {
+    //push(frame::TagValue);
+    //subptr(rsp, 1 * wordSize);
+    //push(frame::TagValue);
+    //subptr(rsp, 1 * wordSize);
+		move(AT, frame::TagValue);
+		push(AT);
+		addi(SP, SP, (-3) * wordSize);
+		swc1(FSF, SP, 0 * wordSize);
+		swc1(SSF, SP, 1 * wordSize);
+
+		lwc1(r, SP, 1*wordSize);
+		swc1(r, SP, 2*wordSize);
+		move(AT, frame::TagValue);
+		sw(AT, SP, 1*wordSize);
+  } else {
+    //subptr(rsp, 2 * wordSize);
+		addi(SP, SP, (-2) * wordSize);
+		swc1(FSF, SP, 0 * wordSize);
+		swc1(SSF, SP, 1 * wordSize);
+  }
+  //movdbl(Address(rsp, 0), r);
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+  switch (state) {
+  //case atos: pop_ptr();                 break;
+		case atos: pop_ptr(V0); 						break;
+  	case btos:
+  	case ctos:
+  	case stos:
+  //case itos: pop_i();                   break;
+		case itos: pop_i(V0);    						break;
+			   /* lw(FSR, SP, 0);
+			      addi(SP, SP, 1 * wordSize);
+			      break;*/
+  //case ltos: pop_l();                   break;
+  //case ftos: pop_f();                   break;
+  //case dtos: pop_d();                   break;
+		//case ltos: pop_l(V0,V1);   					break;
+		case ltos: pop_l(V0);   						break;
+			   /* lw(FSR, SP, 0 * wordSize);
+			      lw(SSR, SP, 1 * wordSize);
+			      addi(SP, SP, 2 * wordSize);
+			      break;
+			      */
+		case ftos: pop_f();      						break;
+			   /* lwc1(FSF, SP, 0 * wordSize);
+			      addi(SP, SP, 1 * wordSize);
+			      break;
+			      */
+		case dtos: pop_d();      						break;
+			   /* lwc1(FSF, SP, 0 * wordSize);
+			      lwc1(SSF, SP, 1 * wordSize);
+			      addi(SP, SP, 2 * wordSize);
+			      break;
+			      */
+  	case vtos: /* nothing to do */      break;
+  	default:   ShouldNotReachHere();
+  }
+	verify_oop(V0, state);
+}
+
+//FSR=V0,SSR=V1
+void InterpreterMacroAssembler::push(TosState state) {
+#if 0
+	verify_oop(rax, state);
+  switch (state) {
+  case atos: push_ptr();                break;
+  case btos:
+  case ctos:
+  case stos:
+  case itos: push_i();                  break;
+  case ltos: push_l();                  break;
+  case ftos: push_f();                  break;
+  case dtos: push_d();                  break;
+  case vtos: /* nothing to do */        break;
+  default  : ShouldNotReachHere();
+  }
+#endif
+	verify_oop(V0, state);
+	switch (state) {
+		case atos:   push(FSR);    break;
+		case btos:						     // fall through
+		case ctos:						     // fall through
+		case stos:						     // fall through
+		case itos:
+			     push_i(FSR);
+			     //addi(SP, SP, (-1) * wordSize);
+			     //sw(A0, SP, 0 * wordSize);
+			     break;
+		case ltos:
+			     addi(SP, SP, (-2) * wordSize);
+			     sw(SSR, SP, 1 * wordSize);
+			     sw(FSR, SP, 0 * wordSize);
+
+			     //push_l(FSR,SSR);
+			     break;
+		case ftos:
+			     /* addi(SP, SP, (-1) * wordSize);
+							swc1(FSF, SP, 0 * wordSize);
+					 */
+			     push_f();
+			     break;
+		case dtos:
+			     /*  addi(SP, SP, (-2) * wordSize);
+				 swc1(FSF, SP, 0 * wordSize);
+				 swc1(SSF, SP, 1 * wordSize);
+				 */
+			     //FIXME, I have no idea which register to use
+			     push_d(FSF);
+			     break;
+		case vtos: /* nothing to do */                            break;
+		default  : ShouldNotReachHere();
+	}
+}
+
+
+
+
+// Tagged stack helpers for swap and dup
+void InterpreterMacroAssembler::load_ptr_and_tag(int n, Register val,
+                                                 Register tag) {
+  //movptr(val, Address(rsp, Interpreter::expr_offset_in_bytes(n)));
+	lw(val, SP, Interpreter::expr_offset_in_bytes(n));
+  if (TaggedStackInterpreter) {
+    //movptr(tag, Address(rsp, Interpreter::expr_tag_offset_in_bytes(n)));
+		lw(tag, SP, Interpreter::expr_tag_offset_in_bytes(n));
+  }
+}
+
+void InterpreterMacroAssembler::store_ptr_and_tag(int n, Register val,
+                                                  Register tag) {
+  //movptr(Address(rsp, Interpreter::expr_offset_in_bytes(n)), val);
+	sw( val,SP, Interpreter::expr_offset_in_bytes(n));
+  if (TaggedStackInterpreter) {
+    //movptr(Address(rsp, Interpreter::expr_tag_offset_in_bytes(n)), tag);
+		sw(tag, SP, Interpreter::expr_tag_offset_in_bytes(n));
+  }
+}
+
+
+// Tagged local support
+//LVP=S7, local variable pointer register , FIXME
+void InterpreterMacroAssembler::tag_local(frame::Tag tag, int n) {
+  if (TaggedStackInterpreter) {
+    if (tag == frame::TagCategory2) {
+      //movptr(Address(r14, Interpreter::local_tag_offset_in_bytes(n+1)),
+      //     (int32_t)frame::TagValue);
+			move(AT, (int)frame::TagValue);
+			sw(AT,LVP, Interpreter::local_tag_offset_in_bytes(n+1));
+      //movptr(Address(r14, Interpreter::local_tag_offset_in_bytes(n)),
+      //     (int32_t)frame::TagValue);
+			sw(AT,LVP, Interpreter::local_tag_offset_in_bytes(n));
+    } else {
+      //movptr(Address(r14, Interpreter::local_tag_offset_in_bytes(n)), (int32_t)tag);
+			move(AT, (int)tag);
+			sw(AT,LVP, Interpreter::local_tag_offset_in_bytes(n));
+    }
+  }
+}
+
+void InterpreterMacroAssembler::tag_local(frame::Tag tag, Register idx) {
+  if (TaggedStackInterpreter) {
+    if (tag == frame::TagCategory2) {
+      //movptr(Address(r14, idx, Address::times_8,
+      //            Interpreter::local_tag_offset_in_bytes(1)), (int32_t)frame::TagValue);
+      //movptr(Address(r14, idx, Address::times_8,
+      //            Interpreter::local_tag_offset_in_bytes(0)), (int32_t)frame::TagValue);
+			shl(idx, 3);
+			add(idx,LVP,idx);
+			move(AT,(int)frame::TagValue);
+			sw(AT, idx, Interpreter::local_tag_offset_in_bytes(1));
+			shl(idx, 3);
+			add(idx,LVP,idx);
+			move(AT,(int)frame::TagValue);
+			sw(AT, idx, Interpreter::local_tag_offset_in_bytes(0));
+    } else {
+      //movptr(Address(r14, idx, Address::times_8, Interpreter::local_tag_offset_in_bytes(0)),
+      //     (int32_t)tag);
+			shl(idx, 3);
+			add(idx,LVP,idx);
+			move(AT,(int)tag);
+			sw(AT, idx, Interpreter::local_tag_offset_in_bytes(0));
+    }
+  }
+}
+
+void InterpreterMacroAssembler::tag_local(Register tag, Register idx) {
+  if (TaggedStackInterpreter) {
+    // can only be TagValue or TagReference
+    //movptr(Address(r14, idx, Address::times_8, Interpreter::local_tag_offset_in_bytes(0)), tag);
+		shl(idx, 3);
+		add(idx,LVP,idx);
+		sw(tag, idx, Interpreter::local_tag_offset_in_bytes(0));
+  }
+}
+
+
+void InterpreterMacroAssembler::tag_local(Register tag, int n) {
+  if (TaggedStackInterpreter) {
+    // can only be TagValue or TagReference
+    //movptr(Address(r14, Interpreter::local_tag_offset_in_bytes(n)), tag);
+		sw(tag, LVP, Interpreter::local_tag_offset_in_bytes(n));
+  }
+}
+
+#ifdef ASSERT
+void InterpreterMacroAssembler::verify_local_tag(frame::Tag tag, int n) {
+  if (TaggedStackInterpreter) {
+     frame::Tag t = tag;
+    if (tag == frame::TagCategory2) {
+      Label nbl;
+      t = frame::TagValue;  // change to what is stored in locals
+      //cmpptr(Address(r14, Interpreter::local_tag_offset_in_bytes(n+1)), (int32_t)t);
+      //jcc(Assembler::equal, nbl);
+			lw(AT, LVP, Interpreter::local_tag_offset_in_bytes(n+1));
+			addi(AT,AT, -(int)t);
+			beq(AT, ZERO, nbl);
+			delayed()->nop();
+      stop("Local tag is bad for long/double");
+      bind(nbl);
+    }
+    Label notBad;
+    //cmpq(Address(r14, Interpreter::local_tag_offset_in_bytes(n)), (int32_t)t);
+    //jcc(Assembler::equal, notBad);
+		lw(AT, LVP, Interpreter::local_tag_offset_in_bytes(n));
+		addi(AT,AT, -(int)t);
+		beq(AT, ZERO, notBad);
+		delayed()->nop();
+
+		// Also compare if the local value is zero, then the tag might
+    // not have been set coming from deopt.
+    //cmpptr(Address(r14, Interpreter::local_offset_in_bytes(n)), 0);
+    //jcc(Assembler::equal, notBad);
+		lw(AT, LVP, Interpreter::local_tag_offset_in_bytes(n+1));
+		beq(AT, ZERO, notBad);
+		delayed()->nop();
+    stop("Local tag is bad");
+    bind(notBad);
+  }
+}
+
+void InterpreterMacroAssembler::verify_local_tag(frame::Tag tag, Register idx) {
+  if (TaggedStackInterpreter) {
+    frame::Tag t = tag;
+    if (tag == frame::TagCategory2) {
+      Label nbl;
+      t = frame::TagValue;  // change to what is stored in locals
+      //cmpptr(Address(r14, idx, Address::times_8, Interpreter::local_tag_offset_in_bytes(1)), (int32_t)t);
+      //jcc(Assembler::equal, nbl);
+			shl(idx, 3);
+			add(idx,LVP,idx);
+			lw(AT, idx, Interpreter::local_tag_offset_in_bytes(1));
+			addi(AT,AT, -(int)t);
+			beq(AT,ZERO, nbl);
+			delayed()->nop();
+      stop("Local tag is bad for long/double");
+      bind(nbl);
+    }
+    Label notBad;
+    //cmpptr(Address(r14, idx, Address::times_8, Interpreter::local_tag_offset_in_bytes(0)), (int32_t)t);
+    //jcc(Assembler::equal, notBad);
+		shl(idx, 3);
+		add(idx,LVP,idx);
+		lw(AT, idx, Interpreter::local_tag_offset_in_bytes(0));
+		addi(AT,AT, -(int)t);
+		beq(AT,ZERO, notBad);
+		delayed()->nop();
+
+    // Also compare if the local value is zero, then the tag might
+    // not have been set coming from deopt.
+    //cmpptr(Address(r14, idx, Address::times_8, Interpreter::local_offset_in_bytes(0)), 0);
+    //jcc(Assembler::equal, notBad);
+		shl(idx, 3);
+		add(idx,LVP,idx);
+		lw(AT, idx, Interpreter::local_tag_offset_in_bytes(0));
+		beq(AT,ZERO, notBad);
+		delayed()->nop();
+    stop("Local tag is bad");
+    bind(notBad);
+  }
+}
+#endif // ASSERT
+
+
+void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 0);
+}
+
+
+void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1) {
+	if (arg_1 != A0) move(A0, arg_1);
+	MacroAssembler::call_VM_leaf_base(entry_point, 1);
+  /*
+	if (c_rarg0 != arg_1) {
+    mov(c_rarg0, arg_1);
+  }
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+	*/
+}
+
+
+void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1,
+                                                   Register arg_2) {
+/*
+	assert(c_rarg0 != arg_2, "smashed argument");
+  assert(c_rarg1 != arg_1, "smashed argument");
+  if (c_rarg0 != arg_1) {
+    mov(c_rarg0, arg_1);
+  }
+  if (c_rarg1 != arg_2) {
+    mov(c_rarg1, arg_2);
+  }
+*/
+	if (arg_1 != A0) move(A0, arg_1);
+	if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
+	MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+
+void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1,
+                                                   Register arg_2,
+                                                   Register arg_3) {
+/*
+	assert(c_rarg0 != arg_2, "smashed argument");
+  assert(c_rarg0 != arg_3, "smashed argument");
+  assert(c_rarg1 != arg_1, "smashed argument");
+  assert(c_rarg1 != arg_3, "smashed argument");
+  assert(c_rarg2 != arg_1, "smashed argument");
+  assert(c_rarg2 != arg_2, "smashed argument");
+  if (c_rarg0 != arg_1) {
+    mov(c_rarg0, arg_1);
+  }
+  if (c_rarg1 != arg_2) {
+    mov(c_rarg1, arg_2);
+  }
+  if (c_rarg2 != arg_3) {
+    mov(c_rarg2, arg_3);
+  }
+*/
+	if (arg_1 != A0) move(A0, arg_1);
+	if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
+	if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry
+void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
+  // set sender sp
+  //lea(r13, Address(rsp, wordSize));
+  // record last_sp
+  //movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), r13);
+	move(T5, SP);
+	sw(SP,FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+    get_thread(temp);
+    // interp_only is an int, on little endian it is sufficient to test the byte only
+    // Is a cmpl faster (ce
+    //cmpb(Address(temp, JavaThread::interp_only_mode_offset()), 0);
+    //jcc(Assembler::zero, run_compiled_code);
+		lw(AT, temp, in_bytes(JavaThread::interp_only_mode_offset()));
+		beq(AT,ZERO, run_compiled_code);
+		delayed()->nop();
+    //jmp(Address(method, methodOopDesc::interpreter_entry_offset()));
+		lw(AT, method, in_bytes(methodOopDesc::interpreter_entry_offset()));
+		jr(AT);
+		delayed()->nop();
+    bind(run_compiled_code);
+  }
+
+  //jmp(Address(method, methodOopDesc::from_interpreted_offset()));
+	lw(AT, method, in_bytes(methodOopDesc::from_interpreted_offset()));
+	jr(AT);
+	delayed()->nop();
+}
+
+
+// The following two routines provide a hook so that an implementation
+// can schedule the dispatch in two parts.  amd64 does not do this.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
+  // Nothing amd64 specific to be done here
+}
+
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+  dispatch_next(state, step);
+}
+
+// assume the next bytecode in T7.
+void InterpreterMacroAssembler::dispatch_base(TosState state,
+                                              address* table,
+                                              bool verifyoop) {
+	if (VerifyActivationFrameSize) {
+		Label L;
+
+		sub(T2, FP, SP);
+		int min_frame_size = (frame::link_offset -
+				frame::interpreter_frame_initial_sp_offset) * wordSize;
+		addi(T2, T2,- min_frame_size);
+		bgez(T2, L);
+		delayed()->nop();
+		stop("broken stack frame");
+		bind(L);
+	}
+	// FIXME: I do not know which register should pass to verify_oop
+	if (verifyoop) verify_oop(FSR, state);
+	sll(T2, T7, 2);
+	move(T3, (int)table);
+	add(T3, T3, T2);
+	lw(T3, T3, 0);
+	jr(T3);
+	delayed()->nop();
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state) {
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state), false);
+}
+
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+  // load next bytecode (load before advancing r13 to prevent AGI)
+	lbu(T7, BCP, step);
+  // advance r13
+  increment(BCP, step);
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
+  // load current bytecode
+	lbu(T7, BCP, 0);
+  dispatch_base(state, table);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+//    If throw_monitor_exception
+//       throws IllegalMonitorStateException
+//    Else if install_monitor_exception
+//       installs IllegalMonitorStateException
+//    Else
+//       no error processing
+// used registers : T5, T6, T7, T8
+// T5 : thread, method access flags
+// T6 : monitor entry pointer
+// T7 : method, monitor top
+// T8 : unlock flag
+void InterpreterMacroAssembler::remove_activation(
+        TosState state,
+        Register ret_addr,
+        bool throw_monitor_exception,
+        bool install_monitor_exception,
+        bool notify_jvmdi) {
+	// Note: Registers V0, V1 and F0, F1 may be in use for the result
+	// check if synchronized method
+	Label unlocked, unlock, no_unlock;
+
+	// get the value of _do_not_unlock_if_synchronized into T8
+#ifndef OPT_THREAD
+	Register thread = T5;
+	get_thread(thread);
+#else
+	Register thread = TREG;
+#endif
+	lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+	// reset the flag
+	sb(ZERO, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+	// get method access flags
+	lw(T7, FP, frame::interpreter_frame_method_offset * wordSize);
+	lw(T5, T7, in_bytes(methodOopDesc::access_flags_offset()));
+	andi(T5, T5, JVM_ACC_SYNCHRONIZED);
+	beq(T5, ZERO, unlocked);
+	delayed()->nop();
+
+	// Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
+	bne(T8, ZERO, no_unlock);
+	delayed()->nop();
+	// unlock monitor
+	push(state);     // save result
+
+	// BasicObjectLock will be first in list,
+	// since this is a synchronized method. However, need
+	// to check that the object has not been unlocked by an explicit monitorexit bytecode.
+	addiu(T6, FP, frame::interpreter_frame_initial_sp_offset * wordSize
+			- (int)sizeof(BasicObjectLock));
+	// address of first monitor
+	lw(T5, T6, BasicObjectLock::obj_offset_in_bytes());
+	bne(T5,ZERO,unlock);
+	delayed()->nop();
+	pop(state);
+	if (throw_monitor_exception) {
+		// Entry already unlocked, need to throw exception
+		//I think mips do not need empty_FPU_stack
+		// remove possible return value from FPU-stack, otherwise stack could overflow
+
+		empty_FPU_stack();
+		call_VM(NOREG, CAST_FROM_FN_PTR(address,
+			InterpreterRuntime::throw_illegal_monitor_state_exception));
+		should_not_reach_here();
+	} else {
+		// Monitor already unlocked during a stack unroll.
+		// If requested, install an illegal_monitor_state_exception.
+		// Continue with stack unrolling.
+		if (install_monitor_exception) {
+			// remove possible return value from FPU-stack,
+			// otherwise stack could overflow
+			empty_FPU_stack();
+			call_VM(NOREG, CAST_FROM_FN_PTR(address,
+			  	InterpreterRuntime::new_illegal_monitor_state_exception));
+
+		}
+
+		b(unlocked);
+		delayed()->nop();
+	}
+
+	bind(unlock);
+
+
+
+	unlock_object(T6);
+	pop(state);
+	// Check that for block-structured locking (i.e., that all locked objects has been unlocked)
+	bind(unlocked);
+
+	// V0, V1: Might contain return value
+
+
+	// Check that all monitors are unlocked
+	{
+		Label loop, exception, entry, restart;
+		const int entry_size  = frame::interpreter_frame_monitor_size() * wordSize;
+		const Address monitor_block_top(FP,
+			frame::interpreter_frame_monitor_block_top_offset * wordSize);
+
+		bind(restart);
+		// points to current entry, starting with top-most entry (ecx)
+		lw(T6, monitor_block_top);
+		// points to word before bottom of monitor block (ebx)
+		addiu(T7, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+		//  lw(AT, ZERO, 12);
+		b(entry);
+		delayed()->nop();
+
+		// Entry already locked, need to throw exception
+		bind(exception);
+
+		if (throw_monitor_exception) {
+			// Throw exception
+			// remove possible return value from FPU-stack,
+			// otherwise stack could overflow
+			empty_FPU_stack();
+			MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
+				InterpreterRuntime::throw_illegal_monitor_state_exception));
+			should_not_reach_here();
+		} else {
+			// Stack unrolling. Unlock object and install illegal_monitor_exception
+			// Unlock does not block, so don't have to worry about the frame
+			// We don't have to preserve eax, edx since we are going to
+			// throw an exception
+			unlock_object(T6);
+			if (install_monitor_exception) {
+				empty_FPU_stack();
+				call_VM(NOREG, CAST_FROM_FN_PTR(address,
+				InterpreterRuntime::new_illegal_monitor_state_exception));
+			}
+
+			b(restart);
+			delayed()->nop();
+		}
+
+		bind(loop);
+		//    stop("before object excetpion");
+
+		lw(T5, T6, BasicObjectLock::obj_offset_in_bytes());
+		bne(T5, ZERO, exception);// check if current entry is used
+		delayed()->nop();
+
+
+		addiu(T6, T6, entry_size);// otherwise advance to next entry
+		bind(entry);
+		bne(T6, T7, loop);	// check if bottom reached
+		delayed()->nop();	// if not at bottom then check this entry
+	}
+
+	bind(no_unlock);
+
+	// jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
+	if (notify_jvmdi) {
+		//notify_method_exit(state);              // preserve TOSCA
+		notify_method_exit(false,state,NotifyJVMTI);    // preserve TOSCA
+	} else {
+		// notify_jvmpi_method_exit(state);       // preserve TOSCA
+		notify_method_exit(false,state,SkipNotifyJVMTI);// preserve TOSCA
+	}
+
+	// remove activation
+	lw(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
+	//lw(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize);
+	lw(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
+	lw(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
+}
+
+#endif // C_INTERP
+
+// Lock object
+//
+// Args:
+//      c_rarg1: BasicObjectLock to be used for locking
+//
+// Kills:
+//      rax
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
+//      rscratch1, rscratch2 (scratch regs)
+void InterpreterMacroAssembler::lock_object(Register lock_reg) {
+	assert(lock_reg == T6, "The argument is only for looks. It must be T6");
+
+	if (UseHeavyMonitors) {
+		call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+				lock_reg);
+	} else {
+
+		Label done;
+
+		const Register swap_reg = T2;  // Must use eax for cmpxchg instruction
+		const Register obj_reg  = T4;  // Will contain the oop
+
+		const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+		const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+		const int mark_offset = lock_offset
+			+ BasicLock::displaced_header_offset_in_bytes();
+
+		Label slow_case;
+		// Load object pointer into obj_reg %ecx
+		lw(obj_reg, lock_reg, obj_offset);
+		if (UseBiasedLocking) {
+			// Note: we use noreg for the temporary register since it's hard
+			// to come up with a free register on all incoming code paths
+			biased_locking_enter(lock_reg, obj_reg, swap_reg, noreg, false,
+					done, &slow_case);
+		}
+
+
+		// Load (object->mark() | 1) into swap_reg %eax
+		lw(T5, obj_reg, 0);
+		ori(swap_reg, T5, 1);
+/////////////////////////////////////////////
+/*		//jerome_for_debug
+		Label ne;
+		push(lock_reg);
+		move(AT, 0x00000005);
+		sub(AT, AT,swap_reg);
+		bne(AT, ZERO, ne);
+		delayed()->nop();
+	     // jerome_for_debug
+		move(AT, (int)(&jerome10));
+		sw(swap_reg, AT, 0);
+	    	lw(lock_reg,obj_reg, lock_offset);
+		move(AT, (int)(&jerome9));
+		sw(lock_reg, AT, 0);
+		// stop("swap reg got a error value");
+		bind(ne);
+		pop(lock_reg);
+*/
+		//////////////////////////////////////////////
+
+
+		// Save (object->mark() | 1) into BasicLock's displaced header
+		sw(swap_reg, lock_reg, mark_offset);
+
+		assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
+		if (os::is_MP()) {
+			//  lock();
+		}
+		cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
+
+		if (PrintBiasedLockingStatistics) {
+			//cond_incl(AT, Address((int) BiasedLocking::fast_path_entry_count_addr(), relocInfo::none));
+		}
+
+		bne(AT, ZERO, done);
+
+		// Test if the oopMark is an obvious stack pointer, i.e.,
+		//  1) (mark & 3) == 0, and
+		//  2) SP <= mark < SP + os::pagesize()
+		//
+		// These 3 tests can be done by evaluating the following
+		// expression: ((mark - esp) & (3 - os::vm_page_size())),
+		// assuming both stack pointer and pagesize have their
+		// least significant 2 bits clear.
+		// NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
+		delayed()->nop();
+
+		sub(swap_reg, swap_reg, SP);
+		move(AT, 3 - os::vm_page_size());
+		andr(swap_reg, swap_reg, AT);
+		// Save the test result, for recursive case, the result is zero
+		sw(swap_reg, lock_reg, mark_offset);
+		if (PrintBiasedLockingStatistics) {
+		// cond_incl(AT, Address((int) BiasedLocking::fast_path_entry_count_addr(), relocInfo::none));
+		}
+
+		beq(swap_reg, ZERO, done);
+		delayed()->nop();
+		bind(slow_case);
+		// Call the runtime routine for slow case
+		call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
+
+		bind(done);
+	}
+}
+
+
+// Unlocks an object. Used in monitorexit bytecode and
+// remove_activation.  Throws an IllegalMonitorException if object is
+// not locked by current thread.
+//
+// Args:
+//      c_rarg1: BasicObjectLock for lock
+//
+// Kills:
+//      rax
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
+//      rscratch1, rscratch2 (scratch regs)
+// Argument: T6 : Points to BasicObjectLock structure for lock
+// Throw an IllegalMonitorException if object is not locked by current thread
+void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
+	assert(lock_reg == T6, "The argument is only for looks. It must be T6");
+
+	if (UseHeavyMonitors) {
+		call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
+	} else {
+		Label done;
+
+		const Register swap_reg   = T2;  // Must use eax for cmpxchg instruction
+		const Register header_reg = T7;  // Will contain the old oopMark
+		const Register obj_reg    = T4;  // Will contain the oop
+
+		save_bcp(); // Save in case of exception
+
+		// Convert from BasicObjectLock structure to object and BasicLock structure
+		// Store the BasicLock address into %eax
+		addi(swap_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
+
+		// Load oop into obj_reg(%ecx)
+		lw(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes ());
+		//free entry
+		// movl(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), NULL_WORD);
+		sw(ZERO,lock_reg, BasicObjectLock::obj_offset_in_bytes());
+		if (UseBiasedLocking) {
+			biased_locking_exit(obj_reg, header_reg, done);
+		}
+
+
+
+		// Load the old header from BasicLock structure
+		lw(header_reg, swap_reg, BasicLock::displaced_header_offset_in_bytes());
+		/*
+		// Free entry
+		sw(ZERO, lock_reg, BasicObjectLock::obj_offset_in_bytes());
+		*/
+		// zero for recursive case
+		beq(header_reg, ZERO, done);
+		delayed()->nop();
+
+		// Atomic swap back the old header
+		if (os::is_MP()); //lock();
+		cmpxchg(header_reg, Address(obj_reg, 0), swap_reg);
+
+		// zero for recursive case
+		bne(AT, ZERO, done);
+		delayed()->nop();
+
+		// Call the runtime routine for slow case.
+		sw(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
+		call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+				lock_reg);
+
+		bind(done);
+
+		restore_bcp();
+	}
+}
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
+                                                         Label& zero_continue) {
+	assert(ProfileInterpreter, "must be profiling interpreter");
+	lw(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+	beq(mdp, ZERO, zero_continue);
+	delayed()->nop();
+}
+
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+	assert(ProfileInterpreter, "must be profiling interpreter");
+	Label zero_continue;
+	///  pushl(eax);
+	///  pushl(ebx);
+	sw(V0, SP, (-1) * wordSize);
+	sw(T0, SP, (-2) * wordSize);
+	addiu(SP, SP, (-2) * wordSize);
+
+	get_method(T0);
+	// Test MDO to avoid the call if it is NULL.
+	///  movl(eax, Address(ebx, in_bytes(methodOopDesc::method_data_offset())));
+	lw(V0, T0, in_bytes(methodOopDesc::method_data_offset()));
+	///  testl(eax, eax);
+	///  jcc(Assembler::zero, zero_continue);
+	beq(V0, ZERO, zero_continue);
+	delayed()->nop();
+
+	// ebx: method T0
+	// esi: bcp BCP
+	call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
+	// eax: mdi V0
+
+	///  movl(ebx, Address(ebx, in_bytes(methodOopDesc::method_data_offset())));
+	lw(T0, T0, in_bytes(methodOopDesc::method_data_offset()));
+	///  testl(ebx, ebx);
+	///  jcc(Assembler::zero, zero_continue);
+	beq(T0, ZERO, zero_continue);
+	delayed()->nop();
+	///  addl(ebx, in_bytes(methodDataOopDesc::data_offset()));
+	///  addl(ebx, eax);
+	addiu(T0, T0, in_bytes(methodDataOopDesc::data_offset()));
+	add(T0, T0, V0);
+
+	///  movl(Address(ebp, frame::interpreter_frame_mdx_offset * wordSize), ebx);
+	sw(T0, FP, frame::interpreter_frame_mdx_offset * wordSize);
+
+	bind(zero_continue);
+	///  popl(ebx);
+	///  popl(eax);
+	addiu(SP, SP, 2 * wordSize);
+	lw(V0, SP, (-1) * wordSize);
+	lw(T0, SP, (-2) * wordSize);
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+#if 0
+	assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+  Label verify_continue;
+  push(rax);
+  push(rbx);
+  push(c_rarg3);
+  push(c_rarg2);
+  test_method_data_pointer(c_rarg3, verify_continue); // If mdp is zero, continue
+  get_method(rbx);
+
+  // If the mdp is valid, it will point to a DataLayout header which is
+  // consistent with the bcp.  The converse is highly probable also.
+  load_unsigned_word(c_rarg2,
+                     Address(c_rarg3, in_bytes(DataLayout::bci_offset())));
+  addptr(c_rarg2, Address(rbx, methodOopDesc::const_offset()));
+  lea(c_rarg2, Address(c_rarg2, constMethodOopDesc::codes_offset()));
+  cmpptr(c_rarg2, r13);
+  jcc(Assembler::equal, verify_continue);
+  // rbx: method
+  // r13: bcp
+  // c_rarg3: mdp
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
+               rbx, r13, c_rarg3);
+  bind(verify_continue);
+  pop(c_rarg2);
+  pop(c_rarg3);
+  pop(rbx);
+  pop(rax);
+#endif // ASSERT
+#endif
+}
+
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
+                                                int constant,
+                                                Register value) {
+  //assert(ProfileInterpreter, "must be profiling interpreter");
+  //Address data(mdp_in, constant);
+  //movptr(data, value);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      int constant,
+                                                      bool decrement) {
+  // Counter address
+  //Address data(mdp_in, constant);
+
+  //increment_mdp_data_at(data, decrement);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
+                                                      bool decrement) {
+/*
+	assert(ProfileInterpreter, "must be profiling interpreter");
+  // %%% this does 64bit counters at best it is wasting space
+  // at worst it is a rare bug when counters overflow
+
+  if (decrement) {
+    // Decrement the register.  Set condition codes.
+    addptr(data, (int32_t) -DataLayout::counter_increment);
+    // If the decrement causes the counter to overflow, stay negative
+    Label L;
+    jcc(Assembler::negative, L);
+    addptr(data, (int32_t) DataLayout::counter_increment);
+    bind(L);
+  } else {
+    assert(DataLayout::counter_increment == 1,
+           "flow-free idiom only works with 1");
+    // Increment the register.  Set carry flag.
+    addptr(data, DataLayout::counter_increment);
+    // If the increment causes the counter to overflow, pull back by 1.
+    sbbptr(data, (int32_t)0);
+  }
+*/
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      Register reg,
+                                                      int constant,
+                                                      bool decrement) {
+  //Address data(mdp_in, reg, Address::times_1, constant);
+
+  //increment_mdp_data_at(data, decrement);
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+                                                int flag_byte_constant) {
+/*
+	assert(ProfileInterpreter, "must be profiling interpreter");
+  int header_offset = in_bytes(DataLayout::header_offset());
+  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
+  // Set the flag
+  orl(Address(mdp_in, header_offset), header_bits);
+*/
+}
+
+
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+                                                 int offset,
+                                                 Register value,
+                                                 Register test_value_out,
+                                                 Label& not_equal_continue) {
+/*
+	assert(ProfileInterpreter, "must be profiling interpreter");
+  if (test_value_out == noreg) {
+    cmpptr(value, Address(mdp_in, offset));
+  } else {
+    // Put the test value into a register, so caller can use it:
+    movptr(test_value_out, Address(mdp_in, offset));
+    cmpptr(test_value_out, value);
+  }
+  jcc(Assembler::notEqual, not_equal_continue);
+*/
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     int offset_of_disp) {
+/*
+	assert(ProfileInterpreter, "must be profiling interpreter");
+  Address disp_address(mdp_in, offset_of_disp);
+  addptr(mdp_in, disp_address);
+  movptr(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), mdp_in);
+*/
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     Register reg,
+                                                     int offset_of_disp) {
+/*
+	assert(ProfileInterpreter, "must be profiling interpreter");
+  Address disp_address(mdp_in, reg, Address::times_1, offset_of_disp);
+  addptr(mdp_in, disp_address);
+  movptr(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), mdp_in);
+*/
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
+                                                       int constant) {
+/*
+	assert(ProfileInterpreter, "must be profiling interpreter");
+  addptr(mdp_in, constant);
+  movptr(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize), mdp_in);
+*/
+}
+
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+/*
+	assert(ProfileInterpreter, "must be profiling interpreter");
+  push(return_bci); // save/restore across call_VM
+  call_VM(noreg,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+          return_bci);
+  pop(return_bci);
+*/
+}
+
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
+                                                     Register bumped_count) {
+/*
+	if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    // Otherwise, assign to mdp
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the taken count.
+    // We inline increment_mdp_data_at to return bumped_count in a register
+    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
+    Address data(mdp, in_bytes(JumpData::taken_offset()));
+    movptr(bumped_count, data);
+    assert(DataLayout::counter_increment == 1,
+            "flow-free idiom only works with 1");
+    addptr(bumped_count, DataLayout::counter_increment);
+    sbbptr(bumped_count, 0);
+    movptr(data, bumped_count); // Store back out
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+    bind(profile_continue);
+  }
+*/
+}
+
+
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+/*
+	if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the not taken count.
+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
+
+    // The method data pointer needs to be updated to correspond to
+    // the next bytecode
+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+    bind(profile_continue);
+  }
+*/
+}
+
+
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+/*
+	if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+    bind(profile_continue);
+  }
+*/
+}
+
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+/*
+	if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+*/
+}
+
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+                                                     Register mdp,
+                                                     Register reg2) {
+/*
+	if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // Record the receiver type.
+    record_klass_in_profile(receiver, mdp, reg2);
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+*/
+}
+
+void InterpreterMacroAssembler::profile_checkcast(bool is_null, Register mdp) {
+#if 0
+#ifndef CORE
+	if (ProfileInterpreter) {
+		Label profile_continue;
+
+		// If no method data exists, go to profile_continue.
+		test_method_data_pointer(mdp, profile_continue);
+
+		if (is_null)                // Set the flag to true.
+			set_mdp_flag_at(mdp, BitData::null_flag_constant());
+
+		// The method data pointer needs to be updated.
+		update_mdp_by_constant(mdp, in_bytes(BitData::bit_data_size()));
+
+		bind (profile_continue);
+	}
+#endif // !CORE
+#endif
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows.  At the same time, it remembers
+// the location of the first empty row.  (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree.  Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+                                        Register receiver, Register mdp,
+                                        Register reg2,
+                                        int start_row, Label& done) {
+#if 0
+	int last_row = VirtualCallData::row_limit() - 1;
+  assert(start_row <= last_row, "must be work left to do");
+  // Test this row for both the receiver and for null.
+  // Take any of three different outcomes:
+  //   1. found receiver => increment count and goto done
+  //   2. found null => keep looking for case 1, maybe allocate this cell
+  //   3. found something else => keep looking for cases 1 and 2
+  // Case 3 is handled by a recursive call.
+  for (int row = start_row; row <= last_row; row++) {
+    Label next_test;
+    bool test_for_null_also = (row == start_row);
+
+    // See if the receiver is receiver[n].
+    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
+    test_mdp_data_at(mdp, recvr_offset, receiver,
+                     (test_for_null_also ? reg2 : noreg),
+                     next_test);
+    // (Reg2 now contains the receiver from the CallData.)
+
+    // The receiver is receiver[n].  Increment count[n].
+    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    increment_mdp_data_at(mdp, count_offset);
+    jmp(done);
+    bind(next_test);
+
+    if (test_for_null_also) {
+      // Failed the equality check on receiver[n]...  Test for null.
+      testptr(reg2, reg2);
+      if (start_row == last_row) {
+        // The only thing left to do is handle the null case.
+        jcc(Assembler::notZero, done);
+        break;
+      }
+      // Since null is rare, make it be the branch-taken case.
+      Label found_null;
+      jcc(Assembler::zero, found_null);
+
+      // Put all the "Case 3" tests here.
+      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done);
+
+      // Found a null.  Keep searching for a matching receiver,
+      // but remember that this is an empty (unused) slot.
+      bind(found_null);
+    }
+  }
+
+  // In the fall-through case, we found no matching receiver, but we
+  // observed the receiver[start_row] is NULL.
+
+  // Fill in the receiver field and increment the count.
+  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
+  set_mdp_data_at(mdp, recvr_offset, receiver);
+  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  movl(reg2, DataLayout::counter_increment);
+  set_mdp_data_at(mdp, count_offset, reg2);
+  jmp(done);
+#endif
+}
+
+// Example state machine code for three profile rows:
+//   // main copy of decision tree, rooted at row[1]
+//   if (row[0].rec == rec) { row[0].incr(); goto done; }
+//   if (row[0].rec != NULL) {
+//     // inner copy of decision tree, rooted at row[1]
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[1].rec != NULL) {
+//       // degenerate decision tree, rooted at row[2]
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       if (row[2].rec != NULL) { goto done; } // overflow
+//       row[2].init(rec); goto done;
+//     } else {
+//       // remember row[1] is empty
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       row[1].init(rec); goto done;
+//     }
+//   } else {
+//     // remember row[0] is empty
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[2].rec == rec) { row[2].incr(); goto done; }
+//     row[0].init(rec); goto done;
+//   }
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+                                                        Register mdp,
+                                                        Register reg2) {
+#if 0
+  assert(ProfileInterpreter, "must be profiling");
+  Label done;
+
+  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done);
+
+  bind (done);
+#endif
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci,
+                                            Register mdp) {
+#if 0
+		if (ProfileInterpreter) {
+    Label profile_continue;
+    uint row;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the total ret count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    for (row = 0; row < RetData::row_limit(); row++) {
+      Label next_test;
+
+      // See if return_bci is equal to bci[n]:
+      test_mdp_data_at(mdp,
+                       in_bytes(RetData::bci_offset(row)),
+                       return_bci, noreg,
+                       next_test);
+
+      // return_bci is equal to bci[n].  Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+      // The method data pointer needs to be updated to reflect the new target.
+      update_mdp_by_offset(mdp,
+                           in_bytes(RetData::bci_displacement_offset(row)));
+      jmp(profile_continue);
+      bind(next_test);
+    }
+
+    update_mdp_for_ret(return_bci);
+
+    bind(profile_continue);
+  }
+#endif
+}
+
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+#if 0
+	if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+#endif
+}
+
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
+#if 0
+  if (ProfileInterpreter && TypeProfileCasts) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    int count_offset = in_bytes(CounterData::count_offset());
+    // Back up the address, since we have already bumped the mdp.
+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+    // *Decrement* the counter.  We expect to see zero or small negatives.
+    increment_mdp_data_at(mdp, count_offset, true);
+
+    bind (profile_continue);
+  }
+#endif
+}
+
+
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+#if 0
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+      // Record the object type.
+      record_klass_in_profile(klass, mdp, reg2);
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+#endif
+}
+
+
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+#if 0
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the default case count
+    increment_mdp_data_at(mdp,
+                          in_bytes(MultiBranchData::default_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         in_bytes(MultiBranchData::
+                                  default_displacement_offset()));
+
+    bind(profile_continue);
+  }
+#endif
+}
+
+
+void InterpreterMacroAssembler::profile_switch_case(Register index,
+                                                    Register mdp,
+                                                    Register reg2) {
+#if 0
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Build the base (index * per_case_size_in_bytes()) +
+    // case_array_offset_in_bytes()
+    movl(reg2, in_bytes(MultiBranchData::per_case_size()));
+    imulptr(index, reg2); // XXX l ?
+    addptr(index, in_bytes(MultiBranchData::case_array_offset())); // XXX l ?
+
+    // Update the case count
+    increment_mdp_data_at(mdp,
+                          index,
+                          in_bytes(MultiBranchData::relative_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         index,
+                         in_bytes(MultiBranchData::
+                                  relative_displacement_offset()));
+
+    bind(profile_continue);
+  }
+#endif
+}
+
+
+
+void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
+  if (state == atos) {
+    MacroAssembler::verify_oop(reg);
+  }
+}
+
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
+  // only if +VerifyFPU  && (state == ftos || state == dtos)
+  // For now, do nothing.
+}
+#endif // !CC_INTERP
+
+
+//FIXME, aoqi:see UltraViolet
+void InterpreterMacroAssembler::notify_method_entry() {
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  //Register tempreg = Rscratch0;
+  Register tempreg = T0;
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label L;
+    //movl(rdx, Address(r15_thread, JavaThread::interp_only_mode_offset()));
+    //testl(rdx, rdx);
+    //jcc(Assembler::zero, L);
+    //lw(tempreg, in_bytes(JavaThread::interp_only_mode_offset()), Rthread);
+    lw(tempreg, S7, in_bytes(JavaThread::interp_only_mode_offset()));
+    beq(tempreg, ZERO, L);
+    delayed()->nop();
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::post_method_entry));
+    bind(L);
+  }
+
+  {
+    //SkipIfEqual skip_if(this, tempreg, ZERO, &DTraceMethodProbes, 1);
+    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+		 //Rthread,
+		 S7,
+		 //Rmethod);
+		 S1);
+  }
+
+}
+
+//FIXME, aoqi:see UltraViolet
+void InterpreterMacroAssembler::notify_method_exit(
+    //TosState state, NotifyMethodExitMode mode) {
+    bool is_native_method, TosState state, NotifyMethodExitMode mode) {
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  //Register tempreg = Rscratch0;
+  Register tempreg = T0;
+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+    Label skip;
+    //lw(tempreg, in_bytes(JavaThread::interp_only_mode_offset()), Rthread);
+    lw(tempreg, S7, in_bytes(JavaThread::interp_only_mode_offset()));
+    beq(tempreg, ZERO, skip);
+    delayed()->nop();
+    // Note: frame::interpreter_frame_result has a dependency on how the
+    // method result is saved across the call to post_method_exit. If this
+    // is changed then the interpreter_frame_result implementation will
+    // need to be updated too.
+
+    // For c++ interpreter the result is always stored at a known location in the frame
+    // template interpreter will leave it on the top of the stack.
+    save_return_value(state, is_native_method);
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+    restore_return_value(state, is_native_method);
+    bind(skip);
+  }
+
+  {
+    // Dtrace notification
+    //SkipIfEqual skip_if(this, tempreg, R0, &DTraceMethodProbes, equal);
+    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
+    save_return_value(state, is_native_method);
+    call_VM_leaf(
+		 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+		 //Rthread, Rmethod);
+		 S7, S1);
+    restore_return_value(state, is_native_method);
+  }
+}
+
+//FIXME  yyq native return 64 bits
+void InterpreterMacroAssembler::save_return_value(
+    TosState state, bool is_native_call) {
+  if (is_native_call) {
+    // save any potential method result value
+    //sd(V0, frame::interpreter_frame_l_scratch_offset * wordSize, FP);
+    //sdc1(F0, frame::interpreter_frame_d_scratch_offset * wordSize, FP);
+    sw(V0, FP, (-9) * wordSize);
+    swc1(F0, FP, (-10) * wordSize);
+
+//    sd(V0, FP, (-9) * wordSize);
+//    sdc1(F0, FP, (-10) * wordSize);
+  } else {
+    push(state);
+  }
+}
+
+//FIXME  yyq native return 64 bits
+void InterpreterMacroAssembler::restore_return_value(
+    TosState state, bool is_native_call) {
+  if (is_native_call) {
+    // Restore any method result value
+    //ld(V0, frame::interpreter_frame_l_scratch_offset * wordSize, FP);
+    //ldc1(F0, frame::interpreter_frame_d_scratch_offset * wordSize, FP);
+    lw(V0, FP, (-9) * wordSize);
+    lwc1(F0, FP, (-10) * wordSize);
+  } else {
+    pop(state);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/interp_masm_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,281 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This file specializes the assember with interpreter-specific macros
+
+
+class InterpreterMacroAssembler: public MacroAssembler {
+#ifndef CC_INTERP
+ protected:
+  // Interpreter specific version of call_VM_base
+  virtual void call_VM_leaf_base(address entry_point,
+                                 int number_of_arguments);
+
+  virtual void call_VM_base(Register oop_result,
+                            Register java_thread,
+                            Register last_java_sp,
+                            address  entry_point,
+                            int number_of_arguments,
+                            bool check_exceptions);
+
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  // base routine for all dispatches
+  void dispatch_base(TosState state, address* table, bool verifyoop = true);
+#endif // CC_INTERP
+
+ public:
+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
+
+  void load_earlyret_value(TosState state);
+
+#ifdef CC_INTERP
+  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
+  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg);
+
+#else
+
+  // Interpreter-specific registers
+  void save_bcp() {
+    //movptr(Address(rbp, frame::interpreter_frame_bcx_offset * wordSize), r13);
+    sw(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
+	}
+
+  void restore_bcp() {
+    //movptr(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize));
+  	lw(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
+	}
+
+  void restore_locals() {
+    //movptr(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize));
+  	lw(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
+	}
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg) {
+    //movptr(reg, Address(rbp, frame::interpreter_frame_method_offset * wordSize));
+  	lw(reg, FP, frame::interpreter_frame_method_offset * wordSize);
+	}
+
+  void get_constant_pool(Register reg) {
+    get_method(reg);
+    //movptr(reg, Address(reg, methodOopDesc::constants_offset()));
+  	lw(reg, reg, in_bytes(methodOopDesc::constants_offset()));
+	}
+
+  void get_constant_pool_cache(Register reg) {
+    get_constant_pool(reg);
+    //movptr(reg, Address(reg, constantPoolOopDesc::cache_offset_in_bytes()));
+		lw(reg, reg, constantPoolOopDesc::cache_offset_in_bytes());
+	}
+
+  void get_cpool_and_tags(Register cpool, Register tags) {
+    get_constant_pool(cpool);
+    //movptr(tags, Address(cpool, constantPoolOopDesc::tags_offset_in_bytes()));
+  	lw(tags, cpool, constantPoolOopDesc::tags_offset_in_bytes());
+	}
+
+  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
+  void get_cache_and_index_at_bcp(Register cache, Register index,
+                                  int bcp_offset);
+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp,
+                                      int bcp_offset);
+
+
+  void pop_ptr(Register r = V0);
+  void pop_i(Register r = V0);
+  //void pop_l(Register r = V0);
+	void pop_l(Register lo = V0, Register hi = V1);
+  //void pop_f(XMMRegister r = xmm0);
+  //void pop_d(XMMRegister r = xmm0);
+  void pop_f();
+  void pop_d();
+  void push_ptr(Register r = V0);
+  void push_i(Register r = V0);
+  //void push_l(Register r = V0);
+	void push_l(Register lo = V0, Register hi = V1);
+  //void push_f(XMMRegister r = xmm0);
+  //void push_d(XMMRegister r = xmm0);
+  void push_f();
+	void push_d(FloatRegister r = F0);
+
+  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
+
+  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
+  //void push(int32_t imm ) { ((MacroAssembler*)this)->push(imm); }
+
+	void pop_dtos_to_esp();
+	void pop_ftos_to_esp();
+
+  void pop(TosState state); // transition vtos -> state
+  void push(TosState state); // transition state -> vtos
+
+  // Tagged stack support, pop and push both tag and value.
+  void pop_ptr(Register r, Register tag);
+  void push_ptr(Register r, Register tag);
+#endif // CC_INTERP
+
+  DEBUG_ONLY(void verify_stack_tag(frame::Tag t);)
+
+#ifndef CC_INTERP
+
+  // Tagged stack helpers for swap and dup
+  void load_ptr_and_tag(int n, Register val, Register tag);
+  void store_ptr_and_tag(int n, Register val, Register tag);
+
+  // Tagged Local support
+  void tag_local(frame::Tag tag, int n);
+  void tag_local(Register tag, int n);
+  void tag_local(frame::Tag tag, Register idx);
+  void tag_local(Register tag, Register idx);
+
+#ifdef ASSERT
+  void verify_local_tag(frame::Tag tag, int n);
+  void verify_local_tag(frame::Tag tag, Register idx);
+#endif // ASSERT
+
+
+  void empty_expression_stack()
+  {
+    lw(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+		// NULL last_sp until next java call
+		sw(ZERO,FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  }
+
+  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
+  void super_call_VM_leaf(address entry_point);
+  void super_call_VM_leaf(address entry_point, Register arg_1);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+  void super_call_VM_leaf(address entry_point,
+                          Register arg_1, Register arg_2, Register arg_3);
+
+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
+  // a subtype of super_klass.
+  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
+	void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
+
+  // Dispatching
+  void dispatch_prolog(TosState state, int step = 0);
+  void dispatch_epilog(TosState state, int step = 0);
+  // dispatch via ebx (assume ebx is loaded already)
+  void dispatch_only(TosState state);
+  // dispatch normal table via ebx (assume ebx is loaded already)
+  void dispatch_only_normal(TosState state);
+  void dispatch_only_noverify(TosState state);
+  // load ebx from [esi + step] and dispatch via ebx
+  void dispatch_next(TosState state, int step = 0);
+  // load ebx from [esi] and dispatch via ebx and table
+  void dispatch_via (TosState state, address* table);
+
+  // jump to an invoked target
+  void jump_from_interpreted(Register method, Register temp);
+
+
+  // Returning from interpreted functions
+  //
+  // Removes the current activation (incl. unlocking of monitors)
+  // and sets up the return address.  This code is also used for
+  // exception unwindwing. In that case, we do not want to throw
+  // IllegalMonitorStateExceptions, since that might get us into an
+  // infinite rethrow exception loop.
+  // Additionally this code is used for popFrame and earlyReturn.
+  // In popFrame case we want to skip throwing an exception,
+  // installing an exception, and notifying jvmdi.
+  // In earlyReturn case we only want to skip throwing an exception
+  // and installing an exception.
+  void remove_activation(TosState state, Register ret_addr,
+                         bool throw_monitor_exception = true,
+                         bool install_monitor_exception = true,
+                         bool notify_jvmdi = true);
+#endif // CC_INTERP
+
+  // Object locking
+  void lock_object  (Register lock_reg);
+  void unlock_object(Register lock_reg);
+
+#ifndef CC_INTERP
+
+  // Interpreter profiling operations
+  void set_method_data_pointer_for_bcp();
+  void test_method_data_pointer(Register mdp, Label& zero_continue);
+  void verify_method_data_pointer();
+
+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
+  void increment_mdp_data_at(Address data, bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, int constant,
+                             bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
+                             bool decrement = false);
+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
+                        Register test_value_out,
+                        Label& not_equal_continue);
+
+  void record_klass_in_profile(Register receiver, Register mdp,
+                               Register reg2);
+  void record_klass_in_profile_helper(Register receiver, Register mdp,
+                                      Register reg2,
+                                      int start_row, Label& done);
+
+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
+  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
+  void update_mdp_by_constant(Register mdp_in, int constant);
+  void update_mdp_for_ret(Register return_bci);
+
+  void profile_taken_branch(Register mdp, Register bumped_count);
+  void profile_not_taken_branch(Register mdp);
+  void profile_call(Register mdp);
+  void profile_final_call(Register mdp);
+  void profile_virtual_call(Register receiver, Register mdp,
+                            Register scratch2);
+  void profile_ret(Register return_bci, Register mdp);
+	void profile_checkcast(bool is_null	, Register mdp);
+  void profile_null_seen(Register mdp);
+  void profile_typecheck(Register mdp, Register klass, Register scratch);
+  void profile_typecheck_failed(Register mdp);
+  void profile_switch_default(Register mdp);
+  void profile_switch_case(Register index_in_scratch, Register mdp,
+                           Register scratch2);
+
+  // Debugging
+  // only if +VerifyOops && state == atos
+  void verify_oop(Register reg, TosState state = atos);
+  // only if +VerifyFPU  && (state == ftos || state == dtos)
+  void verify_FPU(int stack_depth, TosState state = ftos);
+
+#endif // !CC_INTERP
+
+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+
+  // support for jvmti/dtrace
+  void notify_method_entry();
+  void notify_method_exit(bool is_native_method, TosState state, NotifyMethodExitMode mode);
+	void save_return_value(TosState state, bool is_native_call);
+  void restore_return_value(TosState state, bool is_native_call);
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,42 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+
+// Generation of Interpreter
+//
+  friend class AbstractInterpreterGenerator;
+
+ private:
+
+  address generate_normal_entry(bool synchronized);
+  address generate_native_entry(bool synchronized);
+  address generate_abstract_entry(void);
+  address generate_math_entry(AbstractInterpreter::MethodKind kind);
+  address generate_empty_entry(void);
+  address generate_accessor_entry(void);
+  void lock_method(void);
+  void generate_stack_overflow_check(void);
+
+  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
+  void generate_counter_overflow(Label* do_continue);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_interpreterRT_mips.cpp.incl"
+
+#define __ _masm->
+
+// Implementation of SignatureHandlerGenerator
+
+void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
+	// __ lw(temp(), from(), from_offset * wordSize);
+	__ lw(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
+	__ sw(temp(), to(), to_offset * wordSize);
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
+//jerome_for_debug
+	// destroy register AT
+	//  __ addi(temp(), from(), from_offset * wordSize);
+	//	__ lw(AT, from(), from_offset * wordSize);
+	__ addi(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
+	__ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
+
+	Label L;
+	__ bne(AT, ZERO, L);
+	__ delayed()->nop();
+	__ move(temp(), ZERO);
+	__ bind(L);
+	__ sw(temp(), to(), to_offset * wordSize);
+}
+
+// FIXME
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+  // generate code to handle arguments
+  iterate(fingerprint);
+  // return result handler
+	__ move(V0, (int)AbstractInterpreter::result_handler(method()->result_type()));
+	// return
+	__ jr(RA);
+	__ delayed()->nop();
+
+  __ flush();
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+	//printf("pass int\n");
+	Argument jni_arg(jni_offset());
+	__ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
+	//FIXME sw should be removed later
+	__ sw(temp(), to(), jni_offset() * wordSize);
+	__ store_int_argument(temp(), jni_arg);
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+//	box (offset(), jni_offset());
+	Argument jni_arg(jni_offset());
+
+	Register Rtmp1 = temp();
+
+
+	// the handle for a receiver will never be null
+	bool do_NULL_check = offset() != 0 || is_static();
+	__ lw(Rtmp1, from(), Interpreter::local_offset_in_bytes(offset()));
+
+#if 0
+	Register Rtmp3 = Rscratch3;
+	if (TaggedStackInterpreter) {
+	// check we have the obj and not the tag
+		Label ok;
+		__ li(Rtmp3, frame::TagReference);
+		__ bne(Rtmp1, Rtmp3, ok);
+		__ delayed()->nop();
+		__ stop("Native object passed tag by mistake");
+		__ bind(ok);
+	}
+#endif // ASSERT
+
+	Label L;
+	__ bne(Rtmp1, ZERO, L);
+	__ addu_long(Rtmp1, from(), Interpreter::local_offset_in_bytes(offset()));
+	__ move(Rtmp1, ZERO);
+	__ bind(L);
+
+	//FIXME sw should be removed later
+	__ sw(Rtmp1, to(), jni_offset() * wordSize);
+	__ store_ptr_argument(Rtmp1, jni_arg);
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+	if ((jni_offset()) & 1) {
+		_jni_offset++;
+	}
+
+	Argument jni_arg(jni_offset());
+	Argument jni_arg1(jni_offset() + 1);
+
+
+	if(jni_arg.is_Register() && jni_arg1.is_Register()) {
+
+		__ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
+		__ move(jni_arg1.as_Register(), temp());
+		__ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+		__ move(jni_arg.as_Register(), temp());
+
+	}// else
+	{
+		__ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
+		__ sw(temp(), jni_arg1.as_caller_address().base(), jni_arg1.as_caller_address().disp());
+		__ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+		__ sw(temp(), jni_arg.as_caller_address().base(), jni_arg.as_caller_address().disp());
+	}
+
+//	__ store_long_argument(Address(from(), Interpreter::local_offset_in_bytes(offset())), jni_arg);
+}
+
+
+inline Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
+inline Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
+inline Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T3; }
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+
+class SlowSignatureHandler
+  : public NativeSignatureIterator {
+ private:
+  address   _from;
+  intptr_t* _to;
+	bool _align;
+
+#ifdef ASSERT
+  void verify_tag(frame::Tag t) {
+    assert(!TaggedStackInterpreter ||
+           *(intptr_t*)(_from+Interpreter::local_tag_offset_in_bytes(0)) == t, "wrong tag");
+  }
+#endif // ASSERT
+	 virtual void pass_int() {
+		 *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+		 debug_only(verify_tag(frame::TagValue));
+		 _from -= Interpreter::stackElementSize();
+		 _align = !_align;
+	 }
+
+	 virtual void pass_long() {
+		 if (_align) {
+			 _align = false;
+			 _to++;
+		 }
+
+		 _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+		 _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0));
+		 debug_only(verify_tag(frame::TagValue));
+		 _to += 2;
+		 _from -= 2*Interpreter::stackElementSize();
+
+	 }
+
+	 virtual void pass_object() {
+		 // pass address of from
+		 intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
+		 *_to++ = (*(intptr_t*)from_addr == 0) ? NULL : from_addr;
+		 debug_only(verify_tag(frame::TagReference));
+		 _from -= Interpreter::stackElementSize();
+		 _align = !_align;
+	 }
+
+
+
+ public:
+	 //SlowSignatureHandler(methodHandle method, jint* from, jint* to) : NativeSignatureIterator(method) {
+	 SlowSignatureHandler(methodHandle method, address from, intptr_t * to) : NativeSignatureIterator(method) {
+
+		 _from = from;
+		 _to   = to + (is_static() ? 2 : 1);
+		 _align = !is_static();
+	 }
+};
+
+
+IRT_ENTRY(address,
+          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
+                                                     methodOopDesc* method,
+                                                     intptr_t* from,
+                                                     intptr_t* to))
+  methodHandle m(thread, (methodOop)method);
+  assert(m->is_native(), "sanity check");
+
+  // handle arguments
+  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
+
+  // return result handler
+  return Interpreter::result_handler(m->result_type());
+IRT_END
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,52 @@
+/*
+ * Copyright 1998-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+  MacroAssembler* _masm;
+
+  void move(int from_offset, int to_offset);
+
+  void box(int from_offset, int to_offset);
+  void pass_int();
+  void pass_long();
+  void pass_object();
+
+ public:
+  // Creation
+  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+    _masm = new MacroAssembler(buffer);
+  }
+
+  // Code generation
+  void generate(uint64_t fingerprint);
+
+  // Code generation support
+  static inline Register from();
+  static inline Register to();
+  static inline Register temp();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/interpreter_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,280 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_interpreter_mips.cpp.incl"
+
+#define __ _masm->
+
+
+address AbstractInterpreterGenerator::generate_slow_signature_handler() {
+	address entry = __ pc();
+	//tty->print_cr("%p\t%p", entry, InterpreterRuntime::slow_signature_handler);
+	// T7: method
+	// T3: temporary
+	// S7: pointer to locals
+	// SP: begin of copied parameters area
+	//__ move(S3, RA);
+	__ move(T3, SP);
+	__ push(RA);
+	__ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+				InterpreterRuntime::slow_signature_handler), T7, LVP, T3);
+	//__ move(RA, S3);
+	__ pop(RA);
+	__ jr(RA);
+	__ delayed()->nop();
+	return entry;
+}
+
+
+//
+// Various method entries
+//
+
+address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+
+	// T7: methodOop
+	// V0: scratrch
+	// esi: send 's sp, should we use T5 @jerome
+	if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
+	address entry_point = __ pc();
+
+  // These don't need a safepoint check because they aren't virtually
+  // callable. We won't enter these intrinsics from compiled code.
+  // If in the future we added an intrinsic which was virtually callable
+  // we'd have to worry about how to safepoint so that this code is used.
+
+
+  // mathematical functions inlined by compiler
+  // (interpreter must provide identical implementation
+  // in order to avoid monotonicity bugs when switching
+  // from interpreter to compiler in the middle of some
+  // computation)
+  //
+  // stack: [ lo(arg) ] <-- sp
+  //        [ hi(arg) ]
+/*
+  if (Universe::is_jdk12x_version()) {
+    // Note: For JDK 1.2 StrictMath doesn't exist and Math.sin/cos/sqrt are
+    //       native methods. Interpreter::method_kind(...) does a check for
+    //       native methods first before checking for intrinsic methods and
+    //       thus will never select this entry point. Make sure it is not
+    //       called accidentally since the SharedRuntime entry points will
+    //       not work for JDK 1.2.
+    __ should_not_reach_here();
+  } else
+ */
+  {
+    // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are
+    //       java methods.  Interpreter::method_kind(...) will select
+    //       this entry point for the corresponding methods in JDK 1.3.
+//FIXME, @jerome
+	  if (TaggedStackInterpreter) {
+		  __ lw(AT, SP,3*wordSize);
+		  __ push(AT);//push hi note ,SP -=wordSize
+		  __ lw(AT, SP,2*wordSize);
+		  __ push(AT);//push lo
+		  __ lwc1(F12, SP, 2 * wordSize);
+		  __ lwc1(F13, SP, 3 * wordSize);
+		  __ sw(RA, SP, (1) * wordSize);
+		  __ sw(FP, SP, (0) * wordSize);
+		  __ addi(SP, SP, 2 * wordSize);
+		  __ move(FP, SP);
+
+	  }else {
+		  __ lwc1(F12, SP, 0 * wordSize);
+		  __ lwc1(F13, SP, 1 * wordSize);
+		  __ sw(RA, SP, (-1) * wordSize);
+		  __ sw(FP, SP, (-2) * wordSize);
+		  __ move(FP, SP);
+		  __ addi(SP, SP, (-2) * wordSize);
+
+	  }
+	  // [ fp     ] <-- sp
+	  // [ ra     ]
+	  // [ lo     ] <-- fp
+	  // [ hi     ]
+/*
+    switch (kind) {
+      case Interpreter::java_lang_math_sin :
+	__ sincos(true, true);
+	break;
+      case Interpreter::java_lang_math_cos :
+	__ sincos(false, true);
+	break;
+      case Interpreter::java_lang_math_sqrt:
+	__ sqrt_d(F0, F12);
+	break;
+      default                              :
+	ShouldNotReachHere();
+    }
+*/
+  //FIXME, need consider this
+	  switch (kind) {
+		  case Interpreter::java_lang_math_sin :
+			  __ trigfunc('s');
+			  break;
+		  case Interpreter::java_lang_math_cos :
+			  __ trigfunc('c');
+			  break;
+		  case Interpreter::java_lang_math_tan :
+			  __ trigfunc('t');
+			  break;
+		  case Interpreter::java_lang_math_sqrt:
+			  //	__ fsqrt();
+			  __ sqrt_d(F0, F12);
+			  break;
+		  case Interpreter::java_lang_math_abs:
+			  //	__ fabs();
+			  __ abs_d(F0, F12);
+			  break;
+		  case Interpreter::java_lang_math_log:
+			  //	__ flog();
+			  // Store to stack to convert 80bit precision back to 64bits
+			  //	__ push_fTOS();
+			  //	__ pop_fTOS();
+			  break;
+		  case Interpreter::java_lang_math_log10:
+			  //	__ flog10();
+			  // Store to stack to convert 80bit precision back to 64bits
+			  //	__ push_fTOS();
+			  //	__ pop_fTOS();
+			  break;
+		  default                              :
+			  ShouldNotReachHere();
+	  }
+
+	  // must maintain return value in F0:F1
+	  __ lw(RA, FP, (-1) * wordSize);
+	  //FIXME
+	  __ move(SP, T5);
+	  // __ move(SP, T0);
+	  __ lw(FP, FP, (-2) * wordSize);
+	  __ jr(RA);
+	  __ delayed()->nop();
+  }
+  return entry_point;
+}
+
+
+// Abstract method entry
+// Attempt to execute abstract method. Throw exception
+address InterpreterGenerator::generate_abstract_entry(void) {
+
+	// T7: methodOop
+	// V0: receiver (unused)
+	// esi: previous interpreter state (C++ interpreter) must preserve
+	// T5 : sender 's sp
+	address entry_point = __ pc();
+
+	// abstract method entry
+	// throw exception
+	// adjust stack to what a normal return would do
+
+	//__ movl(esp, esi);
+	__ move(SP,T5); //FIXME, why jvm6 add this @jerome
+	__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+	// the call_VM checks for exception, so we should never return here.
+	__ should_not_reach_here();
+
+	return entry_point;
+}
+
+
+// Empty method, generate a very fast return.
+
+address InterpreterGenerator::generate_empty_entry(void) {
+
+	// T7: methodOop
+	// V0: receiver (unused)
+	// esi: previous interpreter state (C++ interpreter) must preserve
+	//T5: sender 's sp , must set sp to this value on return , on mips ,now use T0,as it right?
+	if (!UseFastEmptyMethods) return NULL;
+
+	address entry_point = __ pc();
+
+	Label slow_path;
+	//  __ cmpl(Address((int)SafepointSynchronize::address_of_state(), relocInfo::none),
+	//  SafepointSynchronize::_not_synchronized);
+	// __ jcc(Assembler::notEqual, slow_path);
+	__ move(T6, (int)SafepointSynchronize::address_of_state());
+	__ lw(AT,T6, 0);
+	// __ addi(AT,AT,-(SafepointSynchronize::_not_synchronized));
+	__ move(T6, (SafepointSynchronize::_not_synchronized));
+	__ bne(AT,T6,slow_path);
+	__ delayed()->nop();
+	// do nothing for empty methods (do not even increment invocation counter)
+	// Code: _return
+	// _return
+	// return w/o popping parameters
+	//__ jr(RA);
+	//__ delayed()->nop();
+	// do nothing for empty methods (do not even increment invocation counter)
+	// Code: _return
+	// _return
+	// return w/o popping parameters
+	//__ popl(eax);
+	//__ movl(esp, esi);
+	//__ jmp(eax);
+	__ move(SP, T5);
+	__ jr(RA);
+	__ delayed()->nop();
+	__ bind(slow_path);
+	//(void) generate_asm_interpreter_entry(false);
+	(void) generate_normal_entry(false);
+
+	return entry_point;
+
+}
+
+// This method tells the deoptimizer how big an interpreted frame must be:
+int AbstractInterpreter::size_activation(methodOop method,
+                                         int tempcount,
+                                         int popframe_extra_args,
+                                         int moncount,
+                                         int callee_param_count,
+                                         int callee_locals,
+                                         bool is_top_frame) {
+  return layout_activation(method,
+                           tempcount, popframe_extra_args, moncount,
+                           callee_param_count, callee_locals,
+                           (frame*) NULL, (frame*) NULL, is_top_frame);
+}
+
+void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
+
+  // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
+  // the days we had adapter frames. When we deoptimize a situation where a
+  // compiled caller calls a compiled caller will have registers it expects
+  // to survive the call to the callee. If we deoptimize the callee the only
+  // way we can restore these registers is to have the oldest interpreter
+  // frame that we create restore these values. That is what this routine
+  // will accomplish.
+
+  // At the moment we have modified c2 to not have any callee save registers
+  // so this problem does not exist and this routine is just a place holder.
+
+  assert(f->is_interpreted_frame(), "must be interpreted");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,76 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+ public:
+
+  // Sentinel placed in the code for interpreter returns so
+  // that i2c adapters and osr code can recognize an interpreter
+  // return address and convert the return to a specialized
+  // block of code to handle compiedl return values and cleaning
+  // the fpu stack.
+  static const int return_sentinel;
+
+  static Address::ScaleFactor stackElementScale() {
+    return TaggedStackInterpreter? Address::times_8 : Address::times_4;
+  }
+
+  // Offset from rsp (which points to the last stack element)
+  static int expr_offset_in_bytes(int i) { return stackElementSize()*i ; }
+  static int expr_tag_offset_in_bytes(int i) {
+    assert(TaggedStackInterpreter, "should not call this");
+    return expr_offset_in_bytes(i) + wordSize;
+  }
+	// Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreterSize to get the VM to print out the size.
+  // Max size with JVMTI and TaggedStackInterpreter
+  const static int InterpreterCodeSize = 168 * 1024;
+#if 0
+  // Support for Tagged Stacks
+
+  // Stack index relative to tos (which points at value)
+  static int expr_index_at(int i)     {
+    return stackElementWords() * i;
+  }
+
+  static int expr_tag_index_at(int i) {
+    assert(TaggedStackInterpreter, "should not call this");
+    // tag is one word above java stack element
+    return stackElementWords() * i + 1;
+  }
+
+  // Already negated by c++ interpreter
+  static int local_index_at(int i)     {
+    assert(i<=0, "local direction already negated");
+    return stackElementWords() * i + (value_offset_in_bytes()/wordSize);
+  }
+
+  static int local_tag_index_at(int i) {
+    assert(i<=0, "local direction already negated");
+    assert(TaggedStackInterpreter, "should not call this");
+    return stackElementWords() * i + (tag_offset_in_bytes()/wordSize);
+  }
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2002-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+private:
+
+  // FP value associated with _last_Java_sp:
+  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
+
+public:
+  // Each arch must define reset, save, restore
+  // These are used by objects that only care about:
+  //  1 - initializing a new state (thread creation, javaCalls)
+  //  2 - saving a current state (javaCalls)
+  //  3 - restoring an old state (javaCalls)
+
+  void clear(void) {
+    // clearing _last_Java_sp must be first
+    _last_Java_sp = NULL;
+    // fence?
+    _last_Java_fp = NULL;
+    _last_Java_pc = NULL;
+  }
+
+  void copy(JavaFrameAnchor* src) {
+    // In order to make sure the transition state is valid for "this"
+    // We must clear _last_Java_sp before copying the rest of the new data
+    //
+    // Hack Alert: Temporary bugfix for 4717480/4721647
+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+    // unless the value is changing
+    //
+    if (_last_Java_sp != src->_last_Java_sp)
+      _last_Java_sp = NULL;
+
+    _last_Java_fp = src->_last_Java_fp;
+    _last_Java_pc = src->_last_Java_pc;
+    // Must be last so profiler will always see valid frame if has_last_frame() is true
+    _last_Java_sp = src->_last_Java_sp;
+  }
+
+  // Always walkable
+  bool walkable(void) { return true; }
+  // Never any thing to do since we are always walkable and can find address of return addresses
+  void make_walkable(JavaThread* thread) { }
+
+  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
+
+private:
+
+  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
+
+public:
+
+  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
+
+  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
+  // Assert (last_Java_sp == NULL || fp == NULL)
+  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/jniFastGetField_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,263 @@
+/*
+ * Copyright 2004-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_jniFastGetField_mips.cpp.incl"
+
+#define __ masm->
+
+#define BUFFER_SIZE 30*wordSize
+
+// Instead of issuing lfence for LoadLoad barrier, we create data dependency
+// between loads, which is more efficient than lfence.
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+  const char *name;
+  switch (type) {
+    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
+    case T_BYTE:    name = "jni_fast_GetByteField";    break;
+    case T_CHAR:    name = "jni_fast_GetCharField";    break;
+    case T_SHORT:   name = "jni_fast_GetShortField";   break;
+    case T_INT:     name = "jni_fast_GetIntField";     break;
+    case T_LONG:    name = "jni_fast_GetLongField";    break;
+    default:        ShouldNotReachHere();
+  }
+  ResourceMark rm;
+  BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE);
+  address fast_entry = b->instructions_begin();
+  CodeBuffer cbuf(fast_entry, b->instructions_size());
+  MacroAssembler* masm = new MacroAssembler(&cbuf);
+
+  Label slow;
+
+	//  return pc        RA
+	//  jni env          A0
+	//  obj              A1
+	//  jfieldID         A2
+
+	address counter_addr = SafepointSynchronize::safepoint_counter_addr();
+	__ lui(AT, Assembler::split_high((int)counter_addr));
+	__ lw(T1, AT, Assembler::split_low((int)counter_addr));
+
+	__ andi(AT, T1, 1);
+	__ bne(AT, ZERO, slow);
+	__ delayed()->nop();
+
+	__ lw(A1, A1, 0);              // unbox, *obj
+	__ shr(A2, 2);                 // offset
+	__ add(A1, A1, A2);
+
+  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
+  speculative_load_pclist[count] = __ pc();
+  switch (type) {
+		case T_BOOLEAN: __ lbu (V0, A1, 0); break;
+		case T_BYTE:    __ lb  (V0, A1, 0); break;
+		case T_CHAR:    __ lhu (V0, A1, 0); break;
+		case T_SHORT:   __ lh  (V0, A1, 0); break;
+		case T_INT:     __ lw  (V0, A1, 0); break;
+    case T_LONG:    Unimplemented(); break;
+    default:        ShouldNotReachHere();
+  }
+
+	__ lui(AT, Assembler::split_high((int)counter_addr));
+	__ lw(AT, AT, Assembler::split_low((int)counter_addr));
+	__ bne(T1, AT, slow);
+	__ delayed()->nop();
+
+	__ jr(RA);
+	__ delayed()->nop();
+
+  slowcase_entry_pclist[count++] = __ pc();
+  __ bind (slow);
+  address slow_case_addr;
+  switch (type) {
+    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
+    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
+    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
+    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
+    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
+    case T_LONG:    slow_case_addr = jni_GetLongField_addr();
+  }
+	__ jmp(slow_case_addr);
+	__ delayed()->nop();
+
+  __ flush ();
+
+  return fast_entry;
+}
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+  return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+  return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+  return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+  return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+  return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+  //return generate_fast_get_int_field0(T_LONG);
+	const char *name = "jni_fast_GetLongField";
+	ResourceMark rm;
+	BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE*wordSize);
+	address fast_entry = b->instructions_begin();
+	// CodeBuffer* cbuf = new CodeBuffer(fast_entry, b->instructions_size());
+	CodeBuffer  cbuf (fast_entry, b->instructions_size());
+	MacroAssembler* masm = new MacroAssembler(&cbuf);
+
+	Label slow;
+
+	//  return pc        RA
+	//  jni env          A0
+	//  obj              A1
+	//  jfieldID         A2
+
+	address counter_addr = SafepointSynchronize::safepoint_counter_addr();
+	//__ move(AT, (int)counter_addr);
+	//__ lw(T1, AT, 0);
+	__ lui(AT, Assembler::split_high((int)counter_addr));
+	__ lw(T1, AT, Assembler::split_low((int)counter_addr));
+	__ andi(AT, T1, 1);
+	__ bne(AT, ZERO, slow);
+	__ delayed()->nop();
+
+	__ lw (A1, A1, 0);              // unbox, *obj
+	__ shr(A2, 2);              		// offset
+	__ add(A1, A1, A2);
+
+	assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small");
+	speculative_load_pclist[count++] = __ pc();
+	__ lw(V0, A1, 0);							// eax
+	speculative_load_pclist[count] = __ pc();
+	__ lw(V1, A1, 4);
+
+	__ lui(AT, Assembler::split_high((int)counter_addr));
+	__ lw(AT, AT, Assembler::split_low((int)counter_addr));
+	__ bne(T1, AT, slow);
+	__ delayed()->nop();
+
+	__ jr(RA);
+	__ delayed()->nop();
+
+	slowcase_entry_pclist[count-1] = __ pc();
+	slowcase_entry_pclist[count++] = __ pc();
+	__ bind (slow);
+	address slow_case_addr = jni_GetLongField_addr();;
+	// tail call
+	__ jmp(slow_case_addr);
+	__ delayed()->nop();
+
+	__ flush();
+	return fast_entry;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
+  const char *name;
+  switch (type) {
+    case T_FLOAT:     name = "jni_fast_GetFloatField";     break;
+    case T_DOUBLE:    name = "jni_fast_GetDoubleField";    break;
+    default:          ShouldNotReachHere();
+  }
+  ResourceMark rm;
+  BufferBlob* b = BufferBlob::create(name, BUFFER_SIZE);
+  address fast_entry = b->instructions_begin();
+  CodeBuffer cbuf(fast_entry, b->instructions_size());
+  MacroAssembler* masm = new MacroAssembler(&cbuf);
+
+  Label slow;
+
+	//  return pc        RA
+	//  jni env          A0
+	//  obj              A1
+	//  jfieldID         A2
+
+	address counter_addr = SafepointSynchronize::safepoint_counter_addr();
+	//__ move(AT, (int)counter_addr);
+	//__ lw(T1, AT, 0);
+	__ lui(AT, Assembler::split_high((int)counter_addr));
+	__ lw(T1, AT, Assembler::split_low((int)counter_addr));
+	__ andi(AT, T1, 1);
+	__ bne(AT, ZERO, slow);
+	__ delayed()->nop();
+
+	__ lw(A1, A1, 0);              // unbox, *obj
+	__ shr(A2, 2);             		 // offset
+	__ add(A1, A1, A2);
+
+	assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
+	speculative_load_pclist[count] = __ pc();
+	switch (type) {
+		case T_FLOAT:
+			__ lwc1(F0, A1, 0);
+			break;
+		case T_DOUBLE:
+			__ lwc1(F0, A1, 0);
+			__ lwc1(F1, A1, 4);
+			break;
+		default:       ShouldNotReachHere();
+	}
+
+	__ lui(AT, Assembler::split_high((int)counter_addr));
+	__ lw(AT, AT, Assembler::split_low((int)counter_addr));
+	__ bne(T1, AT, slow);
+	__ delayed()->nop();
+
+	__ jr(RA);
+	__ delayed()->nop();
+
+
+	slowcase_entry_pclist[count++] = __ pc();
+	__ bind (slow);
+	address slow_case_addr;
+	switch (type) {
+		case T_FLOAT:  slow_case_addr = jni_GetFloatField_addr();  break;
+		case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break;
+		default:       ShouldNotReachHere();
+	}
+	__ jmp(slow_case_addr);
+	__ delayed()->nop();
+
+	__ flush ();
+	return fast_entry;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+  return generate_fast_get_float_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+  return generate_fast_get_float_field0(T_DOUBLE);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,107 @@
+/*
+ * Copyright 1998-2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This file holds platform-dependent routines used to write primitive jni
+// types to the array of arguments passed into JavaCalls::call
+
+class JNITypes : AllStatic {
+  // These functions write a java primitive type (in native format)
+  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
+  // I.e., they are functionally 'push' operations if they have a 'pos'
+  // formal parameter.  Note that jlong's and jdouble's are written
+  // _in reverse_ of the order in which they appear in the interpreter
+  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
+  // reverse the argument list constructed by JavaCallArguments (see
+  // javaCalls.hpp).
+
+private:
+
+#ifndef AMD64
+  // 32bit Helper routines.
+  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
+                                                                        *(jint *)(to  ) = from[0];
+									 }
+  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
+#endif // AMD64
+
+public:
+  // Ints are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
+  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
+  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
+
+#ifdef AMD64
+  // Longs are stored in native format in one JavaCallArgument slot at
+  // *(to+1).
+  static inline void put_long(jlong  from, intptr_t *to) {
+    *(jlong*) (to + 1) = from;
+  }
+
+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+#else
+  // Longs are stored in big-endian word format in two JavaCallArgument slots at *to.
+  // The high half is in *to and the low half in *(to+1).
+  static inline void    put_long(jlong  from, intptr_t *to)           { put_int2r((jint *)&from, to); }
+  static inline void    put_long(jlong  from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); }
+  static inline void    put_long(jlong *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); }
+#endif // AMD64
+
+  // Oops are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
+  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
+  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
+
+  // Floats are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
+  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
+  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
+
+#undef _JNI_SLOT_OFFSET
+#define _JNI_SLOT_OFFSET 0
+  // Doubles are stored in big-endian word format in two JavaCallArgument slots at *to.
+  // The high half is in *to and the low half in *(to+1).
+  static inline void    put_double(jdouble  from, intptr_t *to)           { put_int2r((jint *)&from, to); }
+  static inline void    put_double(jdouble  from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); }
+  static inline void    put_double(jdouble *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); }
+
+
+  // The get_xxx routines, on the other hand, actually _do_ fetch
+  // java primitive types from the interpreter stack.
+  // No need to worry about alignment on Intel.
+  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
+#undef _JNI_SLOT_OFFSET
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/jni_mips.h	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,39 @@
+/*
+ * Copyright 1997-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+#define JNIEXPORT
+#define JNIIMPORT
+#define JNICALL
+
+typedef int jint;
+
+#ifdef _LP64
+  typedef long jlong;
+#else
+  typedef long long jlong;
+#endif
+
+typedef signed char jbyte;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/mips.ad	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,8802 @@
+//
+// Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+// Copyright 2010 Lemote, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+//
+
+// GodSon2 Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+// format:
+// reg_def name (call convention, c-call convention, ideal type, encoding);
+// 		call convention :
+//			NS  = No-Save
+//			SOC = Save-On-Call
+//			SOE = Save-On-Entry
+//			AS  = Always-Save
+//		ideal type :
+//			see opto/opcodes.hpp for more info
+// reg_class name (reg, ...);
+// alloc_class name (reg, ...);
+register %{
+	//Integer Registers
+	reg_def ZERO	(NS, 	NS, 	Op_RegI, 0, R0->as_VMReg());
+	reg_def AT		(NS, 	NS, 	Op_RegI, 1, AT->as_VMReg());
+	reg_def V0		(SOC, SOC,	Op_RegI, 2, V0->as_VMReg());
+	reg_def V1		(SOC, SOC,	Op_RegI, 3, V1->as_VMReg());
+	reg_def A0		(SOC, SOC,	Op_RegI, 4, A0->as_VMReg(), ->as_VMReg());
+	reg_def A1		(SOC, SOC,	Op_RegI, 5, A1->as_VMReg());
+	reg_def A2		(SOC, SOC,	Op_RegI, 6, A2->as_VMReg());
+	reg_def A3		(SOC, SOC,	Op_RegI, 7, A3->as_VMReg());
+	reg_def T0		(SOC, SOC,	Op_RegI, 8, T0->as_VMReg());
+	reg_def T1		(SOC, SOC,	Op_RegI, 9, T1->as_VMReg());
+	reg_def T2		(SOC, SOC,	Op_RegI, 10, T2->as_VMReg());
+	reg_def T3		(SOC, SOC,	Op_RegI, 11, T3->as_VMReg());
+	reg_def T4		(SOC, SOC,	Op_RegI, 12, T4->as_VMReg());
+	reg_def T5		(SOC, SOC,	Op_RegI, 13, T5->as_VMReg());
+	reg_def T6		(SOC, SOC,	Op_RegI, 14, T6->as_VMReg());
+	reg_def T7		(SOC, SOC,	Op_RegI, 15, T7->as_VMReg());
+	reg_def S0		(SOE, SOE,	Op_RegI, 16, S0->as_VMReg());
+	reg_def S1		(SOE, SOE,	Op_RegI, 17, S1->as_VMReg());
+	reg_def S2		(SOE, SOE,	Op_RegI, 18, S2->as_VMReg());
+	reg_def S3		(SOE, SOE,	Op_RegI, 19, S3->as_VMReg());
+	reg_def S4		(SOE, SOE,	Op_RegI, 20, S4->as_VMReg());
+	reg_def S5		(SOE, SOE,	Op_RegI, 21, S5->as_VMReg());
+	reg_def S6		(SOE, SOE,	Op_RegI, 22, S6->as_VMReg());
+	reg_def S7		(SOE, SOE,	Op_RegI, 23, S7->as_VMReg());
+	reg_def T8		(SOC, SOC,	Op_RegI, 24, T8->as_VMReg());
+	reg_def T9		(SOC, SOC,	Op_RegI, 25, T9->as_VMReg());
+	reg_def K0		(NS,	NS,		Op_RegI, 26, K0->as_VMReg());
+	reg_def K1		(NS,	NS,		Op_RegI, 27, K1->as_VMReg());
+	reg_def GP		(NS,	NS,		Op_RegI, 28, GP->as_VMReg());
+	reg_def SP		(NS,	NS,		Op_RegI, 29, SP->as_VMReg());
+	reg_def FP		(NS,	NS,		Op_RegI, 30, FP->as_VMReg());
+	reg_def RA		(NS,	SOE,	Op_RegI, 29, RA->as_VMReg());
+
+	// Float registers.
+	reg_def F0		(SOC,	SOC,	Op_RegF, 0, F0->as_VMReg());
+	reg_def F1		(SOC,	SOC,	Op_RegF, 1, F1->as_VMReg());
+	reg_def F2		(SOC,	SOC,	Op_RegF, 2, F2->as_VMReg());
+	reg_def F3		(SOC,	SOC,	Op_RegF, 3, F3->as_VMReg());
+	reg_def F4		(SOC,	SOC,	Op_RegF, 4, F4->as_VMReg());
+	reg_def F5		(SOC,	SOC,	Op_RegF, 5, F5->as_VMReg());
+	reg_def F6		(SOC,	SOC,	Op_RegF, 6, F6->as_VMReg());
+	reg_def F7		(SOC,	SOC,	Op_RegF, 7, F7->as_VMReg());
+	reg_def F8		(SOC,	SOC,	Op_RegF, 8, F8->as_VMReg());
+	reg_def F9		(SOC,	SOC,	Op_RegF, 9, F9->as_VMReg());
+	reg_def F10		(SOC,	SOC,	Op_RegF, 10, F10->as_VMReg());
+	reg_def F11		(SOC,	SOC,	Op_RegF, 11, F11->as_VMReg());
+	reg_def F12		(SOC,	SOC,	Op_RegF, 12, F12->as_VMReg());
+	reg_def F13		(SOC,	SOC,	Op_RegF, 13, F13->as_VMReg());
+	reg_def F14		(SOC,	SOC,	Op_RegF, 14, F14->as_VMReg());
+	reg_def F15		(SOC,	SOC,	Op_RegF, 15, F15->as_VMReg());
+	reg_def F16		(SOC,	SOC,	Op_RegF, 16, F16->as_VMReg());
+	reg_def F17		(SOC,	SOC,	Op_RegF, 17, F17->as_VMReg());
+	reg_def F18		(SOC,	SOC,	Op_RegF, 18, F18->as_VMReg());
+	reg_def F19		(SOC,	SOC,	Op_RegF, 19, F19->as_VMReg());
+	reg_def F20		(SOC,	SOC,	Op_RegF, 20, F20->as_VMReg());
+	reg_def F21		(SOC,	SOC,	Op_RegF, 21, F21->as_VMReg());
+	reg_def F22		(SOC,	SOC,	Op_RegF, 22, F22->as_VMReg());
+	reg_def F23		(SOC,	SOC,	Op_RegF, 23, F23->as_VMReg());
+	reg_def F24		(SOC,	SOC,	Op_RegF, 24, F24->as_VMReg());
+	reg_def F25		(SOC,	SOC,	Op_RegF, 25, F25->as_VMReg());
+	reg_def F26		(SOC,	SOC,	Op_RegF, 26, F26->as_VMReg());
+	reg_def F27		(SOC,	SOC,	Op_RegF, 27, F27->as_VMReg());
+	reg_def F28		(SOC,	SOC,	Op_RegF, 28, F28->as_VMReg());
+	reg_def F29		(SOC,	SOC,	Op_RegF, 29, F29->as_VMReg());
+	reg_def F30		(SOC,	SOC,	Op_RegF, 30, F30->as_VMReg());
+	reg_def F31		(SOC,	SOC,	Op_RegF, 31, F31->as_VMReg());
+
+	alloc_class chunk0(	T0, T1, T2, T3, T4, T5, T6, T7,
+			S0, S1, S2, S3, S4, S5, S6, S7);
+
+	alloc_class chunk1(	AT, T8, T9, SP, FP, GP, ZERO, RA, K0, K1);
+
+	// Class for all registers
+	reg_class any_reg(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+			S0, S1, S2, S3, S4, S5, S6, S7, V0, V1, A0, A1, A2, A3, AT
+			SP, FP, RA, ZERO, GP, K0, K1);
+
+	// Class for general registers
+	reg_class e_reg(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+			S0, S1, S2, S3, S4, S5, S6, S7, V0, V1, A0, A1, A2, A3, AT);
+
+	// Class of registers that can appear in an address with no offset.
+	// EBP and ESP require an extra instruction byte for zero offset.
+	// Used in fast-unlock
+	//reg_class p_reg(EDX, EDI, ESI, EBX);
+	reg_class p_reg(T0, T1, T2, T3, T4, T5, T6, T7, S0, S1, S2, S3, S4, S5, S6, S7);
+
+	reg_class long_reg(V0,V1, A0,A1, A2,A3);
+
+	// Class of integer register pairs that aligns with calling convention
+	reg_class ret_reg(V0,V1);
+	reg_class p0_reg(A0,A1);
+	reg_class p2_reg(A2,A3);
+
+	// Floating point registers.
+	reg_class flt_reg(	F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,
+											F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28,F29,F30,F31 );
+	reg_class dbl_reg( F0,F2,F4,F6,F8,F10,F12,F14,F16,F18,F20,F22,F24,F26,F28,F30 );
+
+	reg_class flt_arg0( F12 );
+	reg_class dbl_arg0( F12 );
+	reg_class dbl_arg1( F14 );
+%}
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+//        int_def  <name>         ( <int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+//        #define  <name>   (<expression>)
+//        // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+definitions %{
+	// The default cost (of an ALU instruction).
+	int_def DEFAULT_COST      (    100,     100);
+	int_def HUGE_COST         (1000000, 1000000);
+
+	// Memory refs are twice as expensive as run-of-the-mill.
+	int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
+
+	// Branches are even more expensive.
+	int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
+	// we use jr instruction to construct call, so more expensive
+	// by yjl 2/28/2006
+	int_def CALL_COST         (    500, DEFAULT_COST * 5);
+%}
+
+
+source %{
+
+#define __ _masm.
+
+	// ****************************************************************************
+	// temporary fix to generate new relocation info
+	#define   RELOC_IMM32    0
+	#define   RELOC_DISP32   1
+	#define   RELOC_CALL32   2
+	// ****************************************************************************
+
+	// How to find the high register of a Long pair, given the low register
+	#define   HIGH_FROM_LOW(x) ((x)+1)
+
+	void emit_orri(CodeBuffer &cbuf, const MachNode* n, int opcode, int rs_enc, int rt_enc, int imm) {
+		int insn = (opcode<<26) | (rs_enc<<21) | (rt_enc<<16) | bitfield(imm, 0, 16);
+		*((int*)cbuf.code_end()) = insn;
+		cbuf.set_code_end(cbuf.code_end() + sizeof(insn));
+	}
+
+	void emit_rrro(CodeBuffer &cbuf, const MachNode* n, int rs_enc, int rt_enc, int rd_enc, int opcode) {
+		int insn = (rs_enc<<21) | (rt_enc<<16) | (rd_enc<<11) | opcode;
+		*((int*)cbuf.code_end()) = insn;
+		cbuf.set_code_end(cbuf.code_end() + sizeof(insn));
+	}
+
+	void emit_rrso(CodeBuffer &cbuf, const MachNode* n, int rt_enc, int rd_enc, int sa, int opcode) {
+		int insn = (rt_enc<<16) | (rd_enc<<11) | (sa<<6) | opcode;
+		*((int*)cbuf.code_end()) = insn;
+		cbuf.set_code_end(cbuf.code_end() + sizeof(insn));
+	}
+
+	// i dont know the real differences of the followed series. just keep them same now.
+	// by yjl 1/6/2006
+
+	// !!!!! Special hack to get all type of calls to specify the byte offset
+	//       from the start of the call to the point where the return address
+	//       will point.
+	int MachCallStaticJavaNode::ret_addr_offset() {
+		return NativeCall::return_address_offset;
+	}
+
+	int MachCallDynamicJavaNode::ret_addr_offset() {
+		return NativeMovConstReg::instruction_size + NativeCall::return_address_offset;
+	}
+
+	int MachCallRuntimeNode::ret_addr_offset() {
+		return NativeCall::return_address_offset;
+	}
+
+	// change here, by yjl 2/28/2006
+	int MachCallCompiledJavaNode::ret_addr_offset() {
+		return NativeCall::return_address_offset;
+	}
+
+	// change here, by yjl 2/28/2006
+	int MachCallInterpreterNode::ret_addr_offset() {
+		// Offset from start of this code to where return address points
+		return NativeCall::return_address_offset;
+	}
+
+	// change here, by yjl 2/28/2006
+	int MachCallNativeNode::ret_addr_offset() {
+		return MachCallRuntimeNode::ret_addr_offset();
+	}
+
+	// Indicate if the safepoint node needs the polling page as an input.
+	// Since x86 does have absolute addressing, it doesn't.
+	// i dont know what should it be returned on godson, just keep it unchanged as x86.
+	// by yjl 1/6/2006
+	// i think it's right now by yjl 2/28/2006
+	bool SafePointNode::needs_polling_address_input() {
+		return false;
+	}
+
+	//
+	// Compute padding required for nodes which need alignment
+	//
+
+	// The address of the call instruction needs to be 4-byte aligned to
+	// ensure that it does not span a cache line so that it can be patched.
+
+	// what the hell does in_24_bit_fp_mode mean?
+	// i just ignore this now.
+	// by yjl 1/6/2006
+	// it's only needed in x86. by yjl 2/28/2006
+	int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
+		return round_to(current_offset, alignment_required()) - current_offset;
+	}
+
+	// The address of the call instruction needs to be 4-byte aligned to
+	// ensure that it does not span a cache line so that it can be patched.
+	int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
+		return round_to(current_offset, alignment_required()) - current_offset;
+	}
+
+	// The address of the call instruction needs to be 4-byte aligned to
+	// ensure that it does not span a cache line so that it can be patched.
+	int CallInterpreterDirectNode::compute_padding(int current_offset) const {
+		return round_to(current_offset, alignment_required()) - current_offset;
+	}
+
+	void add_oop_Relocation(CodeBuffer &cbuf, jobject h) {
+		OopRecorder *oop_recorder = cbuf.oop_recorder();
+		assert(oop_recorder != NULL, "CodeBuffer must have OopRecorder");
+
+		// Create relocation information, record Oop
+		int oop_index = oop_recorder->find_index(h);
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+		assert(h == NULL || JNIHandles::resolve(h)->is_perm(), "cannot embed non-perm oops in code");
+
+		// add Relocation information to the CodeBuffer
+		cbuf.relocate(cbuf.mark(), rspec);
+	}
+
+
+#ifndef PRODUCT
+	void MachBreakpointNode::format( PhaseRegAlloc * ) const {
+		tty->print("break");
+	}
+#endif
+
+	//=============================================================================
+#ifndef PRODUCT
+	void MachPrologNode::format( PhaseRegAlloc *ra_ ) const {
+		Compile* C = ra_->C;
+
+		for (int i = 0; i < OptoPrologueNops; i++) {
+			tty->print_cr("nop"); tty->print("\t");
+		}
+
+		if( VerifyThread ) {
+			tty->print_cr("Verify_Thread"); tty->print("\t");
+		}
+
+
+		int framesize = C->frame_slots() << LogBytesPerInt;
+		assert(framesize % (2*wordSize) == wordSize, "aligned frame size");
+
+		/*if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
+			tty->print("move\tAT, 0xBADB100D\t# Majik cookie for stack depth check\n\t");
+			tty->print("sw\t\tAT, SP, -4\n\t");
+		}*/
+
+		// Calls to C2R adapters often do not accept exceptional returns.
+		// We require that their callers must bang for them.  But be careful, because
+		// some VM calls (such as call site linkage) can use several kilobytes of
+		// stack.  But the stack safety zone should account for that.
+		// See bugs 4446381, 4468289, 4497237.
+		if (C->need_stack_bang(framesize)) {
+			tty->print_cr("# stack bang"); tty->print("\t");
+		}
+
+		if( C->start()->Opcode() == Op_StartI2C) {
+			///tty->print_cr( "MOV    EBX,ESP\t\t# move old ESP to temp");
+			///tty->print_cr( "\tAND    ESP,-8\t\t# Round ESP to even");
+			///tty->print_cr( "\tPUSH   EBX\t\t# Old ESP for EVEN alignment");
+			///tty->print   ( "\t" );
+			tty->print_cr("move		T8, SP");
+			tty->print_cr("move		AT, -8");
+			tty->print_cr("andr		SP, SP, AT");
+			tty->print_cr("sw			T8, SP, -4");
+			tty->print_cr("addiu	SP, SP, -4");
+		} else if( C->start()->Opcode() == Op_StartOSR ) {
+    	///tty->print_cr( "MOV    EBX,EDI\t\t# Move locals ptr to interpreter_arg_ptr_reg");
+    	///tty->print   ( "\t" );
+			// fixme, i dont know the meaning of code now. by yjl 2/21/2006
+  	}
+
+		tty->print("addiu\t,%d\t# Create frame", framesize);
+	}
+#endif
+
+
+	void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+		Compile* C = ra_->C;
+		MacroAssembler masm(&cbuf);
+#define __ masm.
+
+		// WARNING: Initial instruction MUST be 16 bytes or longer so that
+		// NativeJump::patch_verified_entry will be able to patch out the entry code safely.
+		/*if( C->in_24_bit_fp_mode() ) {
+			MacroAssembler masm(&cbuf);
+			Address cntrl_addr_24 = Address((int)StubRoutines::addr_fpu_cntrl_wrd_24(), relocInfo::none);
+			masm.fldcw(cntrl_addr_24);
+			}*/
+
+		int framesize = C->frame_slots() << LogBytesPerInt;
+		///framesize -= wordSize;      // Remove 1 for return adr already pushed
+		assert(framesize % (2*wordSize) == wordSize, "aligned frame size");
+
+		if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
+			///emit_opcode(cbuf, 0x68); // push 0xbadb100d
+			///emit_d32(cbuf, 0xbadb100d);
+			///framesize -= wordSize;
+			__ move(AT, 0xbadb100d);
+			__ sw(AT, SP, -4);
+		}
+
+		// Calls to C2R adapters often do not accept exceptional returns.
+		// We require that their callers must bang for them.  But be careful, because
+		// some VM calls (such as call site linkage) can use several kilobytes of
+		// stack.  But the stack safety zone should account for that.
+		// See bugs 4446381, 4468289, 4497237.
+		if (C->need_stack_bang(framesize)) {
+			///MacroAssembler masm(&cbuf);
+			///masm.generate_stack_overflow_check(framesize);
+			__ generate_stack_overflow_check(framesize);
+		}
+
+		if( C->start()->Opcode() == Op_StartI2C) {
+			///emit_opcode(cbuf, 0x8B);             // MOV  reg,ESP
+			///emit_rm(cbuf, 0x3, EBX_enc, ESP_enc);// interpreter_arg_ptr_reg
+			///emit_opcode(cbuf,0x83);              // AND ESP,-8 ; Round ESP to even
+			///emit_rm(cbuf,0x3,0x4,ESP_enc);
+			///emit_d8(cbuf,-8);
+			///emit_opcode(cbuf,0x50+EBX_enc);      // PUSH EBX (old ESP)
+			__ move(T8, SP);
+			__ move(AT, -8);
+			__ andr(SP, SP, AT);
+			__ sw(T8, SP, -4);
+			//__ addiu(SP, SP -4);
+		} else if( C->start()->Opcode() == Op_StartOSR ) {
+			///emit_opcode(cbuf, 0x8B);             // MOV
+			///emit_rm(cbuf, 0x3, EBX_enc, EDI_enc);// MOV EBX,EDI locals ptr to EBX
+			// fixme, i dont know the meaning of code now. by yjl 2/21/2006
+		}
+
+
+		if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
+			if (framesize) {
+				///emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
+				///emit_rm(cbuf, 0x3, 0x05, ESP_enc);
+				///emit_d8(cbuf, framesize);
+				__ subiu(SP, SP, framesize);
+			}
+		} else {
+			///emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
+			///emit_rm(cbuf, 0x3, 0x05, ESP_enc);
+			///emit_d32(cbuf, framesize);
+			__ subiu(SP, SP, framesize);
+		}
+#undef __
+	}
+
+	uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
+		return MachNode::size(ra_); // too many variables; just compute it the hard way
+	}
+
+	int MachPrologNode::reloc() const {
+		return 0; // a large enough number
+	}
+
+	//=============================================================================
+#ifndef PRODUCT
+	void MachEpilogNode::format( PhaseRegAlloc *ra_ ) const {
+		Compile *C = ra_->C;
+		int framesize = C->frame_slots() << LogBytesPerInt;
+		///framesize -= wordSize;      // Remove 1 for return adr already pushed
+		assert(framesize % (2*wordSize) == wordSize, "aligned frame size");
+
+		///if( C->in_24_bit_fp_mode() ) {
+		///  tty->print("FLDCW  standard control word");
+		///  tty->cr(); tty->print("\t");
+		///}
+		if( framesize ) {
+			///tty->print("ADD    ESP,%d\t# Destroy frame",framesize);
+			///tty->cr(); tty->print("\t");
+			tty->print_cr("addi		SP, SP, %d", framesize);
+		}
+		if( C->start()->Opcode() == Op_StartI2C) {
+			///tty->print("POP    ESP\t\t# Recover prior ESP");
+			///tty->cr(); tty->print("\t");
+			tty->print_cr("lw			SP, SP, -4");
+		}
+		if( do_polling() && SafepointPolling && C->is_method_compilation() ) {
+			///tty->print("TEST  PollPage,EAX\t! Poll Safepoint");
+			///tty->cr(); tty->print("\t");
+			tty->print_cr("lw			ZERO, PollPage, 0");
+		}
+	}
+#endif
+
+	void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+		Compile *C = ra_->C;
+		MacroAssembler masm(&cbuf);
+#define __ masm.
+
+		// If method set FPU control word, restore to standard control word
+		///if( C->in_24_bit_fp_mode() ) {
+		///  MacroAssembler masm(&cbuf);
+		///  Address cntrl_addr_std  = Address((int)StubRoutines::addr_fpu_cntrl_wrd_std(), relocInfo::none);
+		///  masm.fldcw(cntrl_addr_std);
+		///}
+
+		int framesize = C->frame_slots() << LogBytesPerInt;
+		///framesize -= wordSize;      // Remove 1 for return adr already pushed
+		assert(framesize % (2*wordSize) == wordSize, "aligned frame size");
+
+		///if( framesize >= 128 ) {
+		///emit_opcode(cbuf, 0x81); // add  SP, #framesize
+		///emit_rm(cbuf, 0x3, 0x00, ESP_enc);
+		///emit_d32(cbuf, framesize);
+		///}
+		///else if( framesize ) {
+		///  emit_opcode(cbuf, 0x83); // add  SP, #framesize
+		///  emit_rm(cbuf, 0x3, 0x00, ESP_enc);
+		///  emit_d8(cbuf, framesize);
+		//}
+		__ addiu(SP, SP, framesize);
+
+		if( C->start()->Opcode() == Op_StartI2C) {
+			///emit_opcode(cbuf,0x58+ESP_enc); // POP ESP
+			__ lw(SP, SP, -4);
+		}
+
+		if( do_polling() && SafepointPolling && C->is_method_compilation() ) {
+			///cbuf.relocate(cbuf.code_end(), relocInfo::poll_return_type, 0);
+			///emit_opcode(cbuf,0x85);
+			///emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
+			///emit_d32(cbuf, (intptr_t)os::get_polling_page());
+			__ relocate(relocInfo::poll_return_type);
+			__ lui(AT, Assembler::split_high((int)os::get_polling_page()));
+			__ lw(ZERO, AT, Assembler::split_low((int)os::get_polling_page()));
+		}
+#undef __
+	}
+
+	uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+		Compile *C = ra_->C;
+		int size = 4;
+
+		if (C->start()->Opcode() == Op_StartI2C) {
+			size += 4;
+		}
+
+		if ( do_polling() && SafepointPolling && C->is_method_compilation() ) {
+			size += 8;
+		}
+
+		return size;
+	}
+
+	int MachEpilogNode::reloc() const {
+		return 0; // a large enough number
+	}
+
+	const Pipeline * MachEpilogNode::pipeline() const {
+		return MachNode::pipeline_class();
+	}
+
+	int MachEpilogNode::safepoint_offset() const { return 0; }
+
+	//=============================================================================
+
+	enum RC { rc_bad, rc_int, rc_float, rc_stack };
+	static enum RC rc_class( OptoReg::Name reg ) {
+		if( reg == OptoReg::Bad ) return rc_bad;
+		if( reg <= RA_num ) return rc_int;
+		if( reg <= F31_num ) return rc_float;
+
+		assert( reg >= SharedInfo::stack0, "blow up if spilling flags" );
+		return rc_stack;
+	}
+
+	// i dont think we need this, by yjl 2/21/2006
+	/*static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size ) {
+		if( cbuf ) {
+			emit_opcode  (*cbuf, opcode );
+			encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
+#ifndef PRODUCT
+		} else if( !do_size ) {
+			if( size != 0 ) tty->print("\n\t");
+			if( is_load ) tty->print("%s   %s,[ESP + #%d]",op_str,SharedInfo::regName[reg],offset);
+			else          tty->print("%s   [ESP + #%d],%s",op_str,offset,SharedInfo::regName[reg]);
+#endif
+		}
+		int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
+		return size+3+offset_size;
+	}
+
+	// Helper for XMM registers.  Extra opcode bits, limited syntax.
+	static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
+			int offset, int reg_lo, int reg_hi, int size ) {
+		if( cbuf ) {
+			if( reg_lo+1 == reg_hi ) { // double move?
+				emit_opcode  (*cbuf, 0xF2 );
+			} else {
+				emit_opcode  (*cbuf, 0xF3 );
+			}
+			emit_opcode  (*cbuf, 0x0F );
+			emit_opcode  (*cbuf, is_load ? 0x10 : 0x11 );
+			encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
+#ifndef PRODUCT
+		} else if( !do_size ) {
+			if( size != 0 ) tty->print("\n\t");
+			if( reg_lo+1 == reg_hi ) { // double move?
+				if( is_load ) tty->print("MOVSD  %s:%s,[ESP + #%d]",SharedInfo::regName[reg_lo],SharedInfo::regName[reg_hi],offset);
+				else          tty->print("MOVSD  [ESP + #%d],%s:%s",offset,SharedInfo::regName[reg_lo],SharedInfo::regName[reg_hi]);
+			} else {
+				if( is_load ) tty->print("MOVSS  %s,[ESP + #%d]",SharedInfo::regName[reg_lo],offset);
+				else          tty->print("MOVSS  [ESP + #%d],%s",offset,SharedInfo::regName[reg_lo]);
+			}
+#endif
+		}
+		int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
+		return size+5+offset_size;
+	}
+
+	static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+			int src_hi, int dst_hi, int size ) {
+		if( cbuf ) {
+			emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
+			emit_opcode(*cbuf, 0x0F );
+			emit_opcode(*cbuf, 0x10 );
+			emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
+#ifndef PRODUCT
+		} else if( !do_size ) {
+			if( size != 0 ) tty->print("\n\t");
+			if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
+				tty->print("MOVSD  %s:%s,%s:%s",SharedInfo::regName[dst_lo],SharedInfo::regName[dst_hi],SharedInfo::regName[src_lo],SharedInfo::regName[src_hi]);
+			} else {
+				tty->print("MOVSS  %s,%s",SharedInfo::regName[dst_lo],SharedInfo::regName[src_lo]);
+			}
+#endif
+		}
+		return size+4;
+	}
+
+	static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size ) {
+		if( cbuf ) {
+			emit_opcode(*cbuf, 0x8B );
+			emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
+#ifndef PRODUCT
+		} else if( !do_size ) {
+			if( size != 0 ) tty->print("\n\t");
+			tty->print("MOV    %s,%s",SharedInfo::regName[dst],SharedInfo::regName[src]);
+#endif
+		}
+		return size+2;
+	}
+
+	static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, int offset, int size ) {
+		if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
+			if( cbuf ) {
+				emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
+				emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
+#ifndef PRODUCT
+			} else if( !do_size ) {
+				if( size != 0 ) tty->print("\n\t");
+				tty->print("FLD    %s",SharedInfo::regName[src_lo]);
+#endif
+			}
+			size += 2;
+		}
+
+		int st_op = (src_lo != FPR1L_num) ? EBX_num
+			//store & pop :
+			EDX_num
+			//store no pop;
+		const char *op_str;
+		int op;
+		if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
+			op_str = (src_lo != FPR1L_num) ? "DSTP" : "DST ";
+			op = 0xDD;
+		} else {                   // 32-bit store
+			op_str = (src_lo != FPR1L_num) ? "FSTP" : "FST ";
+			op = 0xD9;
+			assert( src_hi == OptoReg::Bad && dst_hi == OptoReg::Bad, "no non-adjacent float-stores" );
+		}
+
+		return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size);
+	}*/
+
+	uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size ) const {
+		// Get registers to move
+		OptoReg::Name src_hi = ra_->get_reg_hi(in(1));
+		OptoReg::Name src_lo = ra_->get_reg_lo(in(1));
+		OptoReg::Name dst_hi = ra_->get_reg_hi(this );
+		OptoReg::Name dst_lo = ra_->get_reg_lo(this );
+
+		enum RC src_hi_rc = rc_class(src_hi);
+		enum RC src_lo_rc = rc_class(src_lo);
+		enum RC dst_hi_rc = rc_class(dst_hi);
+		enum RC dst_lo_rc = rc_class(dst_lo);
+
+		assert( src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register" );
+
+		MacroAssembler *masm = NULL;
+#define __ masm->
+
+		if (cbuf) {
+			masm = new MacroAssembler(cbuf);
+		}
+
+		// Generate spill code!
+		int size = 0;
+
+		if( src_lo == dst_lo && src_hi == dst_hi )
+			return size;            // Self copy, no move
+
+		// --------------------------------------
+		// Check for mem-mem move.  push/pop to move.
+		if( src_lo_rc == rc_stack && dst_lo_rc == rc_stack ) {
+			if( src_hi == dst_lo ) { // overlapping stack copy ranges
+				assert( src_hi_rc == rc_stack && dst_hi_rc == rc_stack, "we only expect a stk-stk copy here" );
+				///size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_hi),ESI_num,0xFF,"PUSH",size);
+				///size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_hi),EAX_num,0x8F,"POP ",size);
+				if (cbuf) {
+					__ lw(AT, SP, ra_->reg2offset(src_hi));
+					__ sw(AT, SP, ra_->reg2offset(dst_hi));
+#ifndef PRODUCT
+				} else {
+					if (!do_size) {
+						tty->print_cr("lw			AT, SP, %d", ra_->reg2offset(src_hi));
+						tty->print_cr("sw			AT, SP, %d", ra_->reg2offset(dst_hi));
+					}
+#endif
+				}
+
+				size += 8;
+				src_hi_rc = dst_hi_rc = rc_bad;  // flag as already moved the hi bits
+			}
+			// move low bits
+			///size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_lo),ESI_num,0xFF,"PUSH",size);
+			///size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_lo),EAX_num,0x8F,"POP ",size);
+			if (cbuf) {
+				__ lw(AT, SP, ra_->reg2offset(src_lo));
+				__ sw(AT, SP, ra_->reg2offset(dst_lo));
+#ifndef PRODUCT
+			} else {
+				if (!do_size) {
+					tty->print_cr("lw			AT, SP, %d", ra_->reg2offset(src_lo));
+					tty->print_cr("sw			AT, SP, %d", ra_->reg2offset(dst_lo));
+				}
+#endif
+			}
+
+			size += 8;
+
+			if( src_hi_rc == rc_stack && dst_hi_rc == rc_stack ) { // mov hi bits
+				///size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_hi),ESI_num,0xFF,"PUSH",size);
+				///size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_hi),EAX_num,0x8F,"POP ",size);
+				if (cbuf) {
+					__ lw(AT, SP, ra_->reg2offset(src_hi));
+					__ sw(AT, SP, ra_->reg2offset(dst_hi));
+#ifndef PRODUCT
+				} else {
+					if (!do_size) {
+						tty->print_cr("lw			AT, SP, %d", ra_->reg2offset(src_hi));
+						tty->print_cr("sw			AT, SP, %d", ra_->reg2offset(dst_hi));
+					}
+#endif
+				}
+				size += 8;
+			}
+		} else
+
+		// --------------------------------------
+		// Check for integer reg-reg copy
+		if( src_lo_rc == rc_int && dst_lo_rc == rc_int ) {
+			///size = impl_mov_helper(cbuf,do_size,src_lo,dst_lo,size);
+			if (cbuf) {
+				__ move(Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo]);
+				if (dst_hi_rc!=rc_bad) {
+					__ move(Matcher::_regEncode[dst_hi], Matcher::_regEncode[src_hi]);
+					size += 4;
+				}
+#ifndef PRODUCT
+			} else if (!do_size) {
+				tty->print_cr("move		%s, %s", SharedInfo::regName[dst_lo], SharedInfo::regName[src_lo]);
+				if (dst_hi_rc!=rc_bad) {
+					tty->print_cr("move		%s, %s", SharedInfo::regName[dst_hi], SharedInfo::regName[src_hi]);
+					size += 4;
+				}
+#endif
+			}
+			size += 4;
+		} else
+
+		// Check for integer store
+		if( src_lo_rc == rc_int && dst_lo_rc == rc_stack ) {
+			///size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_lo),src_lo,0x89,"MOV ",size);
+			if (cbuf) {
+				__ sw(Matcher::_regEncode[src_lo], SP, ra_->reg2offset(dst_lo));
+				if (dst_hi_rc!=rc_bad) {
+					__ sw(Matcher::_regEncode[src_hi], SP, ra_->reg2offset(dst_hi));
+					size +=4;
+				}
+#ifndef PRODUCT
+			} else if (!do_size) {
+				tty->print_cr("sw			%s, %d(SP)", SharedInfo::regName[src_lo], ra_->reg2offset(dst_lo));
+				if (dst_hi_sc!=rc_bad) {
+					tty->print_cr("lw			%s, %d(SP)", SharedInfo::regName[src_hi], ra_->reg2offset(dst_hi));
+					size +=4;
+				}
+#endif
+			}
+			size += 4;
+		} else
+
+		// Check for integer load
+		if( dst_lo_rc == rc_int && src_lo_rc == rc_stack ) {
+			///size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_lo),dst_lo,0x8B,"MOV ",size);
+			if (cbuf) {
+				__ lw(Matcher::_regEncode[dst_lo], SP, ra_->reg2offset(src_lo));
+				if (dst_hi_rc!=rc_bad) {
+					__ lw(Matcher::_regEncode[dst_hi], SP, ra_->reg2offset(src_hi));
+					size +=4;
+				}
+#ifndef PRODUCT
+			} else if (!do_size) {
+				tty->print_cr("lw			%s, %d(SP)", SharedInfo::regName[dst_lo], ra_->reg2offset(src_lo));
+				if (dst_hi_sc!=rc_bad) {
+					tty->print_cr("lw			%s, %d(SP)", SharedInfo::regName[dst_hi], ra_->reg2offset(src_hi));
+					size +=4;
+				}
+#endif
+			}
+			size += 4;
+		} else
+
+		// --------------------------------------
+		// Check for float reg-reg copy
+		if( src_lo_rc == rc_float && dst_lo_rc == rc_float ) {
+			assert( (src_hi_rc == rc_bad && dst_hi_rc == rc_bad) ||
+					(src_lo+1 == src_hi && dst_lo+1 == dst_hi && src_lo&1==0 && dst_lo&1==0), "no non-adjacent float-moves" );
+			if( cbuf ) {
+				///if( src_lo != FPR1L_num ) {
+				///	emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
+				///	emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_lo]-1 );
+				///	emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
+				///	emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_lo] );
+				///} else {
+				///	emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
+				///	emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_lo]-1 );
+				///}
+
+				if (src_hi_rc==rc_bad) {
+					__ mfc0(AT, Matcher::_regEncode[src_lo]);
+					__ mtc0(AT, Matcher::_regEncode[dst_lo]);
+				} else {
+					__ dmfc0(AT, Matcher::_regEncode[src_lo]);
+					__ dmtc0(AT, Matcher::_regEncode[dst_lo]);
+				}
+#ifndef PRODUCT
+			} else if( !do_size ) {
+				///if( size != 0 ) tty->print("\n\t");
+				///if( src_lo != FPR1L_num ) tty->print("FLD    %s\n\tFSTP   %s",SharedInfo::regName[src_lo],SharedInfo::regName[dst_lo]);
+				///else                      tty->print(             "FST    %s",                            SharedInfo::regName[dst_lo]);
+				if (src_hi_rc==rc_bad) {
+					tty->print_cr("mfc0		AT, %s", SharedInfo::regName[src_lo]);
+					tty->print_cr("mtc0   AT, %s", SharedInfo::regName[dst_lo]);
+				} else {
+					tty->print_cr("dmfc0	AT, %s", SharedInfo::regName[src_lo]);
+					tty->print_cr("dmtc0  AT, %s", SharedInfo::regName[dst_lo]);
+				}
+#endif
+			}
+			size += 8;
+		} else
+
+		// Check for float store
+		if( src_lo_rc == rc_float && dst_lo_rc == rc_stack ) {
+			///return impl_fp_store_helper(cbuf,do_size,src_lo,src_hi,dst_lo,dst_hi,ra_->reg2offset(dst_lo),size);
+			assert( (src_hi_rc == rc_bad && dst_hi_rc == rc_bad) ||
+					(src_lo+1 == src_hi && dst_lo+1 == dst_hi && src_lo&1==0 && dst_lo&1==0), "no non-adjacent float-moves" );
+			if( cbuf ) {
+					__ swc1(Matcher::_regEncode[src_lo], SP, ra_->reg2offset(dst_lo));
+					size += 4;
+					if (src_hi_rc!=rc_bad) {
+						__ swc1(Matcher::_regEncode[src_hi], SP, ra_->reg2offset(dst_hi));
+						size += 4;
+					}
+#ifndef PRODUCT
+			} else if(!do_size) {
+				tty->print_cr("swc1		%s, %d(SP)", SharedInfo::regName[src_lo], ra_->reg2offset(dst_lo));
+				size+=4;
+				if (src_hi_rc!=rc_bad) {
+					tty->print_cr("swc1		%s, %d(SP)", SharedInfo::regName[src_hi], ra_->reg2offset(dst_hi));
+					size +=4;
+				}
+#endif
+			} else {
+				if (src_hi_rc!=rc_bad) size+=8;
+				else size+=4;
+			}
+		} else
+
+		// Check for float load
+		if( dst_lo_rc == rc_float && src_lo_rc == rc_stack ) {
+			assert( (src_hi_rc == rc_bad && dst_hi_rc == rc_bad) ||
+					(src_lo+1 == src_hi && dst_lo+1 == dst_hi && src_lo&1==0 && dst_lo&1==0), "no non-adjacent float-moves" );
+			if( cbuf ) {
+					__ lwc1(Matcher::_regEncode[src_lo], SP, ra_->reg2offset(dst_lo));
+					size += 4;
+					if (src_hi_rc!=rc_bad) {
+						__ lwc1(Matcher::_regEncode[src_hi], SP, ra_->reg2offset(dst_hi));
+						size += 4;
+					}
+#ifndef PRODUCT
+			} else if(!do_size) {
+				tty->print_cr("lwc1		%s, %d(SP)", SharedInfo::regName[src_lo], ra_->reg2offset(dst_lo));
+				size+=4;
+				if (src_hi_rc!=rc_bad) {
+					tty->print_cr("lwc1		%s, %d(SP)", SharedInfo::regName[src_hi], ra_->reg2offset(dst_hi));
+					size +=4;
+				}
+#endif
+			} else {
+				if (src_hi_rc!=rc_bad) size+=8;
+				else size+=4;
+			}
+		}
+
+#undef __
+		if (cbuf) {
+			delete masm;
+		}
+
+		assert( size > 0, "missed a case" );
+
+		return size;
+	}
+
+#ifndef PRODUCT
+	void MachSpillCopyNode::format( PhaseRegAlloc *ra_ ) const {
+		implementation( NULL, ra_, false );
+	}
+#endif
+
+	void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+		implementation( &cbuf, ra_, false );
+	}
+
+	uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+		return implementation( NULL, ra_, true );
+	}
+
+	//=============================================================================
+#ifndef PRODUCT
+	void MachNopNode::format( PhaseRegAlloc * ) const {
+		tty->print("NOP    # Pad for loops and calls");
+	}
+#endif
+
+	void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
+		///emit_opcode(cbuf, 0x90);      // nop
+		MacroAssembler masm(cbuf);
+		masm.nop();
+	}
+
+	uint MachNopNode::size(PhaseRegAlloc *) const {
+		return 4;
+	}
+
+
+	//=============================================================================
+#ifndef PRODUCT
+	void BoxLockNode::format( PhaseRegAlloc *ra_ ) const {
+		int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+		int reg = ra_->get_reg_lo(this);
+		///tty->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
+		tty->print_cr("addiu	%s, SP, %d", Matcher::regName[reg], offset);
+	}
+#endif
+
+	void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+		int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+		int reg = ra_->get_encode(this);
+		///if( offset >= 128 ) {
+		///  emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
+		///  emit_rm(cbuf, 0x2, reg, 0x04);
+		///  emit_rm(cbuf, 0x0, 0x04, ESP_enc);
+		///  emit_d32(cbuf, offset);
+		///}
+		///else {
+		///  emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
+		///  emit_rm(cbuf, 0x1, reg, 0x04);
+		///  emit_rm(cbuf, 0x0, 0x04, ESP_enc);
+		///  emit_d8(cbuf, offset);
+		///}
+		MacroAssembler masm(&cbuf);
+		masm.addiu(reg, SP, offset);
+	}
+
+	uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+		///int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+		///if( offset >= 128 ) {
+		///  return 7;
+		///}
+		///else {
+		///  return 4;
+		///}
+		return 4;
+	}
+
+	//=============================================================================
+
+	// what the mean of set a static_stub_Relocation aheade? by yjl 2/22/2006
+	// emit call stub, compiled java to interpreter
+	void emit_java_to_interp(CodeBuffer &cbuf ) {
+		// Stub is fixed up when the corresponding call is converted from calling
+		// compiled code to calling interpreted code.
+		// mov ebx,0
+		// jmp -1
+
+		cbuf.start_a_stub();
+		MacroAssembler masm(&cbuf);
+#define __ masm.
+		// static stub relocation stores the instruction address of the call
+		///cbuf.relocate(cbuf.code_end(),
+		///      static_stub_Relocation::spec(cbuf.mark()), RELOC_IMM32);
+		// static stub relocation also tags the methodOop in the code-stream.
+		///cbuf.relocate(cbuf.code_end(),
+		///      oop_Relocation::spec_for_immediate(), RELOC_IMM32);
+		///emit_opcode(cbuf, 0xB8 | EAX_enc); // mov EAX, method
+		///emit_d32(cbuf,0);               // method is zapped till fixup time
+		///cbuf.set_mark();
+		///emit_opcode(cbuf, 0xE9);        // jmp    entry
+		///emit_d32_reloc(cbuf, -1 -(int)cbuf.code_end()-4,
+		///             runtime_call_Relocation::spec(), RELOC_IMM32 );
+
+		// Update current stubs pointer and restore code_end.
+		int oop_index = __ oop_recorder()->find_index(c->as_jobject());
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(static_stub_Relocation::spec(cbuf.mark()));
+		__ relocate(rspec);
+		__ lui(T7, 0);
+		__ addiu(T7, T7, 0);
+		cbuf.set_mark();
+		__ relocate(runtime_call_Relocation::spec());
+		__ lui(T9, Assembler::split_high(-1));
+		__ addiu(T9, T9, Assembler::split_low(-1));
+		__ jr(T9);
+		__ delayed()->nop();
+
+
+#undef __
+		cbuf.end_a_stub();
+	}
+	// size of call stub, compiled java to interpretor
+	uint size_java_to_interp() {
+		return 20;
+	}
+	// relocation entries for call stub, compiled java to interpretor
+	uint reloc_java_to_interp() {
+		return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
+	}
+
+	//=============================================================================
+#ifndef PRODUCT
+	void MachUEPNode::format( PhaseRegAlloc *ra_ ) const {
+		///tty->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
+		///tty->print_cr("\tJNE    OptoRuntime::handle_ic_miss_stub");
+		///tty->print_cr("\tNOP");
+		///tty->print_cr("\tNOP");
+		tty->print_cr("\tlw		AT, %d(%s)", oopDesc::klass_offset_in_bytes(), RECEIVER->name());
+		tty->print_cr("\tbeq	AT, %s, L", IC_Klass->name());
+		tty->print_cr("\tnop");
+		tty->print_cr("\tjmp	OptoRuntime::handle_ic_miss_stub");
+		tty->print_cr("\tnop");
+		tty->print_cr("L:");
+		if( !OptoBreakpoint )
+			tty->print_cr("nop");
+	}
+#endif
+
+	void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+		MacroAssembler masm(&cbuf);
+#define __ masm.
+#ifdef ASSERT
+		uint code_size = cbuf.code_size();
+#endif
+		///masm.cmpl(eax, Address(ecx, oopDesc::klass_offset_in_bytes()));
+		Label L;
+		///masm.jcc(Assembler::notEqual, OptoRuntime::handle_ic_miss_stub(), relocInfo::runtime_call_type);
+		__ lw(AT, RECEIVER, oopDesc::klass_offset_in_bytes());
+		__ beq(AT, IC_Klass, L);
+		__ delayed()->nop();
+		__ jmp(OptoRuntime::handle_ic_miss_stub(), relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		__ bind(L);
+		/* WARNING these NOPs are critical so that verified entry point is properly
+			 aligned for patching by NativeJump::patch_verified_entry() */
+		// no need now for godson2. by yjl 2/22/2006
+		///masm.nop();
+		///masm.nop();
+		if( !OptoBreakpoint ) // Leave space for int3
+			///   masm.nop();
+			__ nop();
+
+		assert(cbuf.code_size() - code_size == size(ra_), "checking code size of inline cache node");
+	}
+
+	uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
+		return OptoBreakpoint ? 20 : 24;
+	}
+
+	uint offset_start_of_table() {
+		return 0;
+	}
+
+//=============================================================================
+#ifndef PRODUCT
+	void MachC2IEntriesNode::format( PhaseRegAlloc *ra_ ) const {
+		int ic_reg  = Matcher::inline_cache_reg();
+		int rec_reg = Matcher::compiler_method_oop_reg();
+		const char *ic_name  = Matcher::regName[ic_reg];
+		const char *rec_name = Matcher::regName[rec_reg];
+		const char *fp_name  = "FP";
+
+		tty->print_cr("------ MKH Unverified Entry Point");
+		int disp = oopDesc::klass_offset_in_bytes();
+		// Access receiver klass: this->klass
+		///tty->print_cr( "\tMOV    %s,[%s+%d]\t# Receiver klass", tmp_name, rec_name, disp);
+		tty->print_cr("\tlw\t\tAT, %d(%s)", disp, rec_name);
+		disp = compiledICHolderOopDesc::holder_klass_offset();
+		///tty->print_cr( "\tCMP    %s,[%s+compiledICHolderOopDesc::holder_klass_offset()  %d]", tmp_name, ic_name, disp);
+		tty->print_cr("\tlw\t\tT8, %d(%s)", disp, ic_name);
+		tty->print_cr("\tbne\t\tAT, T8, OptoRuntime::handle_ic_miss_stub");
+
+		// Unpack compiledIC
+		disp = compiledICHolderOopDesc::holder_method_offset();
+		///tty->print_cr( "\tMOV    %s,[%s+compiledICHolderOopDesc::holder_method_offset() %d]", ic_name, ic_name, disp);
+		tty->print_cr("\tmove\t%s, %d(%s)", ic_name, disp, ic_name);
+
+		// Jump to inline cache miss fixup if check fails
+		///tty->print_cr( "\tJNE    OptoRuntime::handle_ic_miss_stub");
+
+		tty->print_cr( "------ Std Verified Entry Point");
+	}
+#endif
+
+	void MachC2IEntriesNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+		int ic_reg  = Matcher::inline_cache_reg();
+		int rec_reg = Matcher::compiler_method_oop_reg();
+		int ic_encode  = Matcher::_regEncode[ic_reg];
+		int rec_encode = Matcher::_regEncode[rec_reg];
+		int tmp_encode = Matcher::_regEncode[tmp_reg];
+
+		// !!!!!
+		// Check that the registers are distinct, check ic_reg != rec_reg
+		// checked in ADLC
+		assert( ic_reg != rec_reg, "registers must be distinct");
+		assert( ic_reg != tmp_reg, "registers must be distinct");
+		// Check that these registers are caller-saved
+		assert( register_save_policy[ic_reg] == 'C' ||
+				register_save_policy[ic_reg] == 'A',
+				"This register must be caller-saved or always-saved.\n");
+		assert( register_save_policy[rec_reg] == 'C' ||
+				register_save_policy[rec_reg] == 'A',
+				"This register must be caller-saved or always-saved.\n");
+		assert( register_save_policy[tmp_reg] == 'C' ||
+				register_save_policy[tmp_reg] == 'A',
+				"This register must be caller-saved or always-saved.\n");
+
+		MacroAssembler masm(&cbuf);
+#define __ masm.
+		Label L;
+
+		// ------ MKH Entry Point, Unverified
+		// Receives the MethodKlassHolder in inline_cache_reg
+
+		// size 13+6
+		// Access "this" pointer from stack
+
+		// Access receiver klass: this->klass
+		int disp     = oopDesc::klass_offset_in_bytes();
+		///assert( -128 <= disp && disp <= 127, "klass_offset_in_bytes is small");
+		///emit_opcode(cbuf, 0x8B);   // MOV tmp_reg,[rec_reg+klass_offset_in_bytes]
+		///emit_rm(cbuf, 0x01, tmp_encode, rec_encode ); // R/M byte
+		///emit_d8(cbuf, disp);              // Displacement
+		__ lw(AT, rec_encode, disp);
+
+
+		// Compare this->klass, in rec_reg, with inline_cached_klass
+		disp     = compiledICHolderOopDesc::holder_klass_offset();
+		///assert( -128 <= disp && disp <= 127, "holder_klass_offset is small displacement");
+		///emit_opcode(cbuf, 0x3B);  // CMP tmp_reg,[ic_reg+holder_klass_offset]
+		///emit_rm(cbuf, 0x01, tmp_encode, ic_encode );  // R/M byte
+		///emit_d8(cbuf, disp );              // Displacement
+		__ lw(T8, ic_encode, disp);
+
+		__ beq(AT, T8, L);
+		__ delayed()->nop();
+
+		// Access method_oop from compiledICHolder
+		disp     = compiledICHolderOopDesc::holder_method_offset();
+		///assert( -128 <= disp && disp <= 127, "holder_method_offset is small");
+		///emit_opcode(cbuf, 0x8B);     // MOV    ic_reg,[ic_reg+holder_method_offset]
+		///emit_rm(cbuf, 0x01, ic_encode, ic_encode ); // R/M byte
+		///emit_d8(cbuf, disp);              // Displacement
+
+		// i dont think we need the runtime call relocation.
+		// FIXME by yjl 2/24/2005
+		__ lui(T9, OptoRuntime::handle_ic_miss_stub());
+		__ addiu(T9, T9, OptoRuntime::handle_ic_miss_stub());
+		__ jr(T9);
+		__ delayed()->lw(ic_encode, ic_encode, disp);
+
+		__ bind(L);
+
+		///cbuf.set_mark();
+		///emit_opcode(cbuf, 0x0F);           // JNE    FIXUP
+		///emit_opcode(cbuf, 0x85);
+		// Grab address for fixup branch in unvalidated entry
+		///address addr = OptoRuntime::handle_ic_miss_stub();
+		///emit_d32_reloc(cbuf, addr - cbuf.code_end()-sizeof(int32),
+		///		runtime_call_Relocation::spec(), RELOC_IMM32 );
+
+		// ------ Std Verified Entry Point
+		// Receives a method oop in inline_cache_reg
+#undef __
+	}
+
+	uint MachC2IEntriesNode::size(PhaseRegAlloc *ra_) const {
+		return 32;
+	}
+
+	//=============================================================================
+
+#ifndef PRODUCT
+	void MachC2IcheckICNode::format( PhaseRegAlloc *ra_ ) const {
+		// get register. Inline cache register will contain methodOop at this point. compiler_method_oop_reg is
+		// used tempoarily.
+		int method_oop = Matcher::inline_cache_reg();
+
+		const char *method_oop_name  = Matcher::regName[method_oop];
+
+		tty->print_cr( "------ checkIC ------");
+		int disp = in_bytes(methodOopDesc::compiled_code_offset());
+		///tty->print_cr( "\tMOV    %s,[%s+in_bytes(methodOopDesc::compiled_code_offset()) %d]", temp_name, method_oop_name, disp);
+		///tty->print_cr( "\tTEST   %s, %s\t# code exists?" , temp_name, temp_name);
+		///tty->print_cr( "\tJNE    OptoRuntime::handle_wrong_method_stub()");
+		tty->print_cr("\tlw\t\tAT, %d(%s)", disp, method_oop_name);
+		tty->print_cr("\tbnez\tAT, OptoRuntime::handle_wrong_method_stub()");
+	}
+#endif
+
+	void MachC2IcheckICNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+		int method_oop_reg     = Matcher::inline_cache_reg();
+		int method_oop_encode  = Matcher::_regEncode[method_oop_reg];
+
+		MacroAssembler masm(&cbuf);
+#define __ masm.
+
+		Label L;
+
+		// Access code field from methodOop
+		int disp = in_bytes(methodOopDesc::compiled_code_offset());
+		///assert( -128 <= disp && disp <= 127, "code offset to big");
+		// MOV    temp_reg,[method_oop_reg+methodOop::compiled_code_offset_in_bytes]
+		///emit_opcode(cbuf, 0x8B);
+		///emit_rm(cbuf, 0x01, temp_encode, method_oop_encode ); // R/M byte
+		///emit_d8(cbuf, disp);              // Displacement
+		__ lw(AT, method_oop_encode, disp);
+		__ bne(AT, ZERO, L);
+		__ delayed()->nop();
+
+
+		// TEST temp_reg, temp_reg
+		///emit_opcode(cbuf, 0x85);
+		///emit_rm(cbuf, 0x03 ,temp_encode, temp_encode);
+
+		// jne clear_ic_stub()
+		///cbuf.set_mark();
+		///emit_opcode(cbuf, 0x0F);
+		///emit_opcode(cbuf, 0x85);
+		// Grab address for fixup branch in unvalidated entry
+		///address addr = OptoRuntime::handle_wrong_method_stub();
+		///emit_d32_reloc(cbuf, addr - cbuf.code_end()-sizeof(int32),
+		///              runtime_call_Relocation::spec(), RELOC_IMM32 );
+		__ lui(T9, Assembler::split_high((int)OptoRuntime::handle_wrong_method_stub()));
+		__ addiu(T9, T9, Assembler::split_low((int)OptoRuntime::handle_wrong_method_stub()));
+		__ jr(T9);
+		__ delayed()->nop();
+
+		__ bind(L);
+#undef __
+	}
+
+	uint MachC2IcheckICNode::size(PhaseRegAlloc *ra_) const {
+		return 28;
+	}
+
+	//=============================================================================
+	// Emit exception handler code.  Stuff framesize into a register
+	// and call a VM stub routine.
+	void emit_exception_handler( CodeBuffer &cbuf ) {
+		MacroAssembler masm(&cbuf);
+#define __ masm.
+
+		// Lazy deopt bug 4932387. If last instruction is a call then we
+		// need an area to patch where we won't overwrite the exception
+		// handler. This means we need 5 bytes.
+		///for (int i = 0; i < NativeCall::instruction_size ; i++ ) {
+		///	emit_opcode(cbuf, 0x90);
+		///}
+		__ nop();
+		__ nop();
+		__ nop();
+		__ nop();
+
+		// Now mark the functional start of the exception handler
+		cbuf.set_exception_offset(cbuf.code_size());
+		///cbuf.set_mark();
+		///emit_opcode(cbuf, 0xE9);        // jmp    entry
+		///emit_d32_reloc(cbuf, ((int)OptoRuntime::exception_blob()->instructions_begin()) - ((int)cbuf.code_end())-4,
+		///		runtime_call_Relocation::spec(), RELOC_IMM32 );
+		__ lui(T9, Assembler::split_high((int)OptoRuntime::exception_blob()));
+		__ addiu(T9, T9, Assembler::split_low((int)OptoRuntime::exception_blob()));
+		__ jr(T9);
+		__ delayed()->nop();
+#undef __
+	}
+
+	uint size_exception_handler() {
+		// NativeCall instruction size is the same as NativeJump.
+		// exception handler starts out as jump and can be patched to
+		// a call be deoptimization. The *2 is because of the padding
+		// we need to make sure that deopt patches don't accidentally
+		// overwrite patched exception handler (4932387)
+		///return 2*NativeCall::instruction_size;
+		return 32;
+	}
+
+	int Matcher::regnum_to_fpu_offset(int regnum) {
+		return regnum - 32; // The FP registers are in the second chunk
+	}
+
+	bool is_positive_zero_float(jfloat f) {
+		return jint_cast(f) == jint_cast(0.0F);
+	}
+
+	bool is_positive_one_float(jfloat f) {
+		return jint_cast(f) == jint_cast(1.0F);
+	}
+
+	bool is_positive_zero_double(jdouble d) {
+		return jlong_cast(d) == jlong_cast(0.0);
+	}
+
+	bool is_positive_one_double(jdouble d) {
+		return jlong_cast(d) == jlong_cast(1.0);
+	}
+
+	// JumpTable support
+	const bool Matcher::jumpTableSupported(void) {
+		return false;
+	}
+
+	// This is UltraSparc specific, true just means we have fast l2f conversion
+	const bool Matcher::convL2FSupported(void) {
+		return true;
+	}
+
+	// Is this branch offset short enough that a short branch can be used?
+	//
+	// NOTE: If the platform does not provide any short branch variants, then
+	//       this method should return false for offset 0.
+	///bool Matcher::is_short_branch_offset(int offset) {
+	///	return (-128 <= offset && offset <= 127);
+	///}
+
+	// Should the Matcher clone shifts on addressing modes, expecting them to
+	// be subsumed into complex addressing expressions or compute them into
+	// registers?  True for Intel but false for most RISCs
+	const bool Matcher::clone_shift_expressions = false;
+
+	// Is it better to copy float constants, or load them directly from memory?
+	// Intel can load a float constant from a direct address, requiring no
+	// extra registers.  Most RISCs will have to materialize an address into a
+	// register first, so they would do better to copy the constant from stack.
+	const bool Matcher::rematerialize_float_constants = false;
+
+	// If CPU can load and store mis-aligned doubles directly then no fixup is
+	// needed.  Else we split the double into 2 integer pieces and move it
+	// piece-by-piece.  Only happens when passing doubles into C code as the
+	// Java calling convention forces doubles to be aligned.
+	const bool Matcher::misaligned_doubles_ok = false;
+
+
+	// what the hell does this mean? just leave itself now
+	// by yjl 2/24/2006
+	void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
+		// Get the memory operand from the node
+		uint numopnds = node->num_opnds();        // Virtual call for number of operands
+		uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
+		assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
+		uint opcnt     = 1;                 // First operand
+		uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
+		while( idx >= skipped+num_edges ) {
+			skipped += num_edges;
+			opcnt++;                          // Bump operand count
+			assert( opcnt < numopnds, "Accessing non-existent operand" );
+			num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
+		}
+
+		MachOper *memory = node->_opnds[opcnt];
+		MachOper *new_memory = NULL;
+		switch (memory->opcode()) {
+			case DIRECT:
+			case INDOFFSET32X:
+				// No transformation necessary.
+				return;
+			case INDIRECT:
+				new_memory = new indirect_win95_safeOper( );
+				break;
+			case INDOFFSET8:
+				new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
+				break;
+			case INDOFFSET32:
+				new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
+				break;
+			case INDINDEXOFFSET:
+				new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
+				break;
+			case INDINDEXSCALE:
+				new_memory = new indIndexScale_win95_safeOper(memory->scale());
+				break;
+			case INDINDEXSCALEOFFSET:
+				new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
+				break;
+			case LOAD_LONG_INDIRECT:
+			case LOAD_LONG_INDOFFSET32:
+				// Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
+				return;
+			default:
+				assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
+				return;
+		}
+		node->_opnds[opcnt] = new_memory;
+	}
+
+	// Advertise here if the CPU requires explicit rounding operations
+	// to implement the UseStrictFP mode.
+	const bool Matcher::strict_fp_requires_explicit_rounding = false;
+
+	// Do floats take an entire double register or just half?
+	const bool Matcher::float_in_double = true;
+	// Do ints take an entire long register or just half?
+	const bool Matcher::int_in_long = false;
+
+
+	// What is the range of offsets for allocator spill instructions?
+	// Offsets larger than this will encode to a 'large' instruction and
+	// offsets same size or smaller will encode to a 'small' instruction.
+	// On Sparc the 'small' offset is from 0 to 4096; offsets larger than
+	// this will not have any sane encoding (there's no spare register to
+	// build up a large offset).  However, 4096 should be plenty large
+	// enough.  On Intel the 'small' offset is from 0 to 127; 'large' offsets
+	// are +128 on up.  The allocator will match both large and small versions
+	// of load/store [SP+offset] instructions, and will clone such instructions
+	// in fixup_spills and patch in the correct offset.
+	///const int Matcher::short_spill_offset_limit = 128;
+
+	// Return whether or not this register is ever used as an argument.  This
+	// function is used on startup to build the trampoline stubs in generateOptoStub.
+	// Registers not mentioned will be killed by the VM call in the trampoline, and
+	// arguments in those registers not be available to the callee.
+	bool Matcher::can_be_arg( int reg ) {
+		///f(  reg == ECX_num   || reg == EDX_num   ) return true;
+		///if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
+		///if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE==2 ) return true;
+		if (reg>=A0_num && reg<=A3_num) return true;
+		if (reg>=F12_num && reg<=F15_num) return true;
+		return false;
+	}
+
+	bool Matcher::is_spillable_arg( int reg ) {
+		return can_be_arg(reg);
+	}
+%}
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to output
+// byte streams.  Encoding classes generate functions which are called by
+// Machine Instruction Nodes in order to generate the bit encoding of the
+// instruction.  Operands specify their base encoding interface with the
+// interface keyword.  There are currently supported four interfaces,
+// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
+// operand to generate a function which returns its register number when
+// queried.   CONST_INTER causes an operand to generate a function which
+// returns the value of the constant when queried.  MEMORY_INTER causes an
+// operand to generate four functions which return the Base Register, the
+// Index Register, the Scale Value, and the Offset Value of the operand when
+// queried.  COND_INTER causes an operand to generate six functions which
+// return the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional instruction.
+// Instructions specify two basic values for encoding.  They use the
+// ins_encode keyword to specify their encoding class (which must be one of
+// the class names specified in the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode.  Only the opcode sections which a particular instruction
+// needs for encoding need to be specified.
+encode %{
+	enc_class orri( memory mem, iRegI dst ) %{
+		emit_orri(cbuf, this, $primary, $tertiary,
+				mem$$base, $mem$$disp, $mem$$index, $dst$$reg);
+		%}
+
+%}
+
+
+//---------mFRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+//  S T A C K   L A Y O U T    Allocators stack-slot number
+//                             |   (to get allocators register number
+//  G  Owned by    |        |  v    add SharedInfo::stack0)
+//  r   CALLER     |        |
+//  o     |        +--------+      pad to even-align allocators stack-slot
+//  w     V        |  pad0  |        numbers; owned by CALLER
+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
+//  h     ^        |   in   |  5
+//        |        |  args  |  4   Holes in incoming args owned by SELF
+//  |     |    old |        |  3
+//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
+//  v     |        |  ret   |  3   return address
+//     Owned by    +--------+
+//      Self       |  pad2  |  2   pad to align old SP
+//        |        +--------+  1
+//        |        | locks  |  0
+//        |        +--------+----> SharedInfo::stack0, even aligned
+//        |        |  pad1  | 11   pad to align new SP
+//        |        +--------+
+//        |        |        | 10
+//        |        | spills |  9   spills
+//        V        |        |  8   (pad0 slot for callee)
+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
+//        ^        |  out   |  7
+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
+//   Owned by  new |				|
+//		Callee    SP-+--------+----> Matcher::_new_SP, even aligned
+//           			 |        |
+//
+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
+//         known from SELF's arguments and the Java calling convention.
+//         Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+//         area, those holes are owned by SELF.  Holes in the outgoing area
+//         are owned by the CALLEE.  Holes should not be nessecary in the
+//         incoming area, as the Java calling convention is completely under
+//         the control of the AD file.  Doubles can be sorted and packed to
+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
+//         varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
+//         even aligned with pad0 as needed.
+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
+//         region 6-11 is even aligned; it may be padded out more so that
+//         the region from SP to FP meets the minimum stack alignment.
+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
+//         alignment.  Region 11, pad1, may be dynamically extended so that
+//         SP meets the minimum alignment.
+
+frame %{
+  stack_direction(TOWARDS_LOW);
+
+  // These three registers define part of the calling convention
+  // between compiled code and the interpreter.
+	// SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
+	// for more information. by yjl 3/16/2006
+  inline_cache_reg(IC_Klass);          // Inline Cache Register or methodOop for I2C
+  interpreter_arg_ptr_reg(A0);         // Argument pointer for I2C adapters
+  compiler_method_oop_reg(RECEIVER);   // Temporary in compiled entry-points
+  interpreter_method_oop_reg(T7);      // Method Oop Register when calling interpreter
+
+  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
+  ///cisc_spilling_operand_name(indOffset32);
+
+  // Number of stack slots consumed by locking an object
+	// generate Compile::sync_stack_slots
+  sync_stack_slots(1);
+
+  frame_pointer(SP);
+  // Interpreter stores its frame pointer in a register which is
+  // stored to the stack by I2CAdaptors.
+  // I2CAdaptors convert from interpreted java to compiled java.
+  interpreter_frame_pointer(FP);
+
+	// generate Matcher::stack_alignment
+  stack_alignment(6);            // Log of alignment size in bits (64-bit -> 6)
+
+  // Number of stack slots between incoming argument block and the start of
+  // a new frame.  The PROLOG must add this many slots to the stack.  The
+  // EPILOG must remove this many slots.  Intel needs one slot for
+  // return address.
+	// generate Matcher::in_preserve_stack_slots
+  in_preserve_stack_slots(VerifyStackAtCalls);
+
+  // Number of stack slots reserved just above SELF's SP.
+  // After a call, these remain between outgoing parameters and callee's frame.
+  out_preserve_stack_slots(0);
+
+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
+  // for calls to C.  Supports the var-args backing area for register parms.
+  varargs_C_out_slots_killed(0);
+
+  // The after-PROLOG location of the return address.  Location of
+  // return address specifies a type (REG or STACK) and a number
+  // representing the register number (i.e. - use a register name) or
+  // stack slot.
+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+  // Otherwise, it is above the locks and verification slot and alignment word
+  return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+	// will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
+	// StartNode::calling_convention call this. by yjl 3/16/2006
+  calling_convention %{
+    uint    stack = 0;          // Starting stack position for args on stack
+    int ireg=A0_num, freg = F12_num;
+
+		// Now pick where all else goes.
+    for( i = 0; i < length; i++) {
+      // From the type and the argument number (count) compute the location
+      switch( sig[i].ideal_reg() ) {
+      case Op_RegI:
+      case Op_RegP:
+        if( stack<4 )  {
+          sig[i].set1(ireg++); stack++; freg++;
+        } else {
+          sig[i].set1(SharedInfo::stack2reg(stack++));
+        }
+        break;
+      case Op_RegF:
+        if( stack<4 ) {
+          sig[i].set1(freg++); stack++; ireg++;
+        } else {
+          sig[i].set1(SharedInfo::stack2reg(stack++));
+        }
+        break;
+      case Op_RegL:
+				//align first
+				if ( stack%2 ) {
+					stack++; ireg++; freg++;
+				}
+				if ( stack<4 ) {
+					sig[i].set2(ireg); ireg+=2; freg+=2; stack+=2;
+				} else {
+					sig[i].set2(SharedInfo::stack2reg(stack)); stack+=2;
+				}
+        break;
+      case Op_RegD:
+				//align first
+				if ( stack%2 ) {
+					stack++; ireg++; freg++;
+				}
+				if ( stack<4 ) {
+					sig[i].set2(freg); ireg+=2; freg+=2; stack+=2;
+				} else {
+					sig[i].set2(SharedInfo::stack2reg(stack)); stack+=2;
+				}
+        break;
+      case 0: sig[i].set_bad(); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+
+  %}
+
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+	// SEE CallRuntimeNode::calling_convention for more information. by yjl 3/16/2006
+  c_calling_convention %{
+		uint    stack = 0;          // Starting stack position for args on stack
+    int ireg=A0_num, freg = F12_num;
+
+		// Now pick where all else goes.
+    for( i = 0; i < length; i++) {
+      // From the type and the argument number (count) compute the location
+      switch( sig[i].ideal_reg() ) {
+      case Op_RegI:
+      case Op_RegP:
+        if( stack<4 )  {
+          sig[i].set1(ireg++); stack++; freg++;
+        } else {
+          sig[i].set1(SharedInfo::stack2reg(stack++));
+        }
+        break;
+      case Op_RegF:
+        if( stack<4 ) {
+          sig[i].set1(freg++); stack++; ireg++;
+        } else {
+          sig[i].set1(SharedInfo::stack2reg(stack++));
+        }
+        break;
+      case Op_RegL:
+				//align first
+				if ( stack%2 ) {
+					stack++; ireg++; freg++;
+				}
+				if ( stack<4 ) {
+					sig[i].set2(ireg); ireg+=2; freg+=2; stack+=2;
+				} else {
+					sig[i].set2(SharedInfo::stack2reg(stack)); stack+=2;
+				}
+        break;
+      case Op_RegD:
+				//align first
+				if ( stack%2 ) {
+					stack++; ireg++; freg++;
+				}
+				if ( stack<4 ) {
+					sig[i].set2(freg); ireg+=2; freg+=2; stack+=2;
+				} else {
+					sig[i].set2(SharedInfo::stack2reg(stack)); stack+=2;
+				}
+        break;
+      case 0: sig[i].set_bad(); break;
+      default:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+  %}
+
+  // Location of C & interpreter return values
+	// register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
+	// SEE Matcher::match. by yjl 3/16/2006
+  c_return_value %{
+    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+    static int lo[Op_RegL+1] = { 0, 0, V0_num,      V0_num,      F0_num,    F0_num, V0_num };
+    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, F1_num, V1_num };
+    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
+  %}
+
+  // Location of return values
+	// register(s) contain(s) return value for Op_StartC2I and Op_Start.
+	// SEE Matcher::match. by yjl 3/16/2006
+  return_value %{
+    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+    static int lo[Op_RegL+1] = { 0, 0, V0_num,      V0_num,      F0_num,    F0_num, V0_num };
+    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, F1_num, V1_num };
+    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
+  %}
+
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(0);        // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(100);       // Required cost attribute
+ins_attrib ins_size(32);         // Required size attribute (in bits)
+ins_attrib ins_pc_relative(0);  // Required PC Relative flag
+ins_attrib ins_short_branch(0); // Required flag: is this instruction a
+                                // non-matching short branch variant of some
+                                                            // long branch?
+ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
+                                // specifies the alignment that some part of the instruction (not
+                                // necessarily the start) requires.  If > 1, a compute_padding()
+                                // function must be provided for the instruction
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+// Immediate Operands
+// Integer Immediate
+operand immI() %{
+  match(ConI);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for test vs zero
+operand immI0() %{
+  predicate( n->get_int() == 0 );
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for test vs not-zero
+operand immInz() %{
+  predicate( n->get_int() != 0 );
+  match(ConI);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for test vs not-minus-1
+operand immInone() %{
+  predicate( n->get_int() != -1 );
+  match(ConI);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for increment
+operand immI1() %{
+  predicate( n->get_int() == 1 );
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for decrement
+operand immI_M1() %{
+  predicate( n->get_int() == -1 );
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Valid scale values for addressing modes
+operand immI2() %{
+  predicate(0 <= n->get_int() && (n->get_int() <= 3));
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI8() %{
+  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI16() %{
+  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
+  match(ConI);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for long shifts
+operand immI_32() %{
+  predicate( n->get_int() == 32 );
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer Immediate: the value 10
+operand immI10() %{
+  predicate(n->get_int() == 10);
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_1_31() %{
+  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_32_63() %{
+  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate
+operand immP() %{
+  match(ConP);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immP0() %{
+  predicate( n->get_int() == 0 );
+  match(ConP);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate
+operand immL() %{
+  match(ConL);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate zero
+operand immL0() %{
+  predicate( n->get_long() == 0L );
+  match(ConL);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long immediate from 0 to 127.
+// Used for a shorter form of long mul by 10.
+operand immL_127() %{
+  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
+  match(ConL);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits() %{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(20);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL32() %{
+  predicate(n->get_long() == (int)(n->get_long()));
+  match(ConL);
+  op_cost(20);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+//Double Immediate zero
+operand immD0() %{
+  // Do additional (and counter-intuitive) test against NaN to work around VC++
+  // bug that generates code such that NaNs compare equal to 0.0
+  predicate( n->getd() == 0.0 && !g_isnan(n->getd()) );
+  match(ConD);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double Immediate
+operand immD1() %{
+  predicate( n->getd() == 1.0 );
+  match(ConD);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double Immediate
+operand immD() %{
+  match(ConD);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immXD() %{
+  predicate(UseSSE == 2);
+  match(ConD);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double Immediate zero
+operand immXD0() %{
+  // Do additional (and counter-intuitive) test against NaN to work around VC++
+  // bug that generates code such that NaNs compare equal to 0.0 AND do not
+  // compare equal to -0.0.
+  predicate( UseSSE==2 && jlong_cast(n->getd()) == 0 );
+  match(ConD);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate zero
+operand immF0() %{
+  predicate( n->getf() == 0.0 );
+  match(ConF);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF() %{
+  match(ConF);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immXF() %{
+  predicate(UseSSE >= 1);
+  match(ConF);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate zero.  Zero and not -0.0
+operand immXF0() %{
+  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
+  match(ConF);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Immediates for special shifts (sign extend)
+
+// Constants for increment
+operand immI_16() %{
+  predicate( n->get_int() == 16 );
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_24() %{
+  predicate( n->get_int() == 24 );
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for byte-wide masking
+operand immI_255() %{
+  predicate( n->get_int() == 255 );
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Register Operands
+// Integer Register
+operand eRegI() %{
+  constraint(ALLOC_IN_RC(e_reg));
+  match(RegI);
+  match(xRegI);
+  match(eAXRegI);
+  match(eBXRegI);
+  match(eCXRegI);
+  match(eDXRegI);
+  match(eDIRegI);
+  match(eSIRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Subset of Integer Register
+operand xRegI(eRegI reg) %{
+  constraint(ALLOC_IN_RC(x_reg));
+  match(reg);
+  match(eAXRegI);
+  match(eBXRegI);
+  match(eCXRegI);
+  match(eDXRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Special Registers
+operand eAXRegI(xRegI reg) %{
+  constraint(ALLOC_IN_RC(eax_reg));
+  match(reg);
+  match(eRegI);
+
+  format %{ "EAX" %}
+  interface(REG_INTER);
+%}
+
+// Special Registers
+operand eBXRegI(xRegI reg) %{
+  constraint(ALLOC_IN_RC(ebx_reg));
+  match(reg);
+  match(eRegI);
+
+  format %{ "EBX" %}
+  interface(REG_INTER);
+%}
+
+operand eCXRegI(xRegI reg) %{
+  constraint(ALLOC_IN_RC(ecx_reg));
+  match(reg);
+  match(eRegI);
+
+  format %{ "ECX" %}
+  interface(REG_INTER);
+%}
+
+operand eDXRegI(xRegI reg) %{
+  constraint(ALLOC_IN_RC(edx_reg));
+  match(reg);
+  match(eRegI);
+
+  format %{ "EDX" %}
+  interface(REG_INTER);
+%}
+
+operand eDIRegI(xRegI reg) %{
+  constraint(ALLOC_IN_RC(edi_reg));
+  match(reg);
+  match(eRegI);
+
+  format %{ "EDI" %}
+  interface(REG_INTER);
+%}
+
+operand naxRegI() %{
+  constraint(ALLOC_IN_RC(nax_reg));
+  match(RegI);
+  match(eCXRegI);
+  match(eDXRegI);
+  match(eSIRegI);
+  match(eDIRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand nadxRegI() %{
+  constraint(ALLOC_IN_RC(nadx_reg));
+  match(RegI);
+  match(eBXRegI);
+  match(eCXRegI);
+  match(eSIRegI);
+  match(eDIRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand ncxRegI() %{
+  constraint(ALLOC_IN_RC(ncx_reg));
+  match(RegI);
+  match(eAXRegI);
+  match(eDXRegI);
+  match(eSIRegI);
+  match(eDIRegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
+// //
+operand eSIRegI(xRegI reg) %{
+   constraint(ALLOC_IN_RC(esi_reg));
+   match(reg);
+   match(eRegI);
+
+   format %{ "ESI" %}
+   interface(REG_INTER);
+%}
+
+// Pointer Register
+operand anyRegP() %{
+  constraint(ALLOC_IN_RC(any_reg));
+  match(RegP);
+  match(eAXRegP);
+  match(eBXRegP);
+  match(eCXRegP);
+  match(eDIRegP);
+  match(eRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand eRegP() %{
+  constraint(ALLOC_IN_RC(e_reg));
+  match(RegP);
+  match(eAXRegP);
+  match(eBXRegP);
+  match(eCXRegP);
+  match(eDIRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// On windows95, EBP is not safe to use for implicit null tests.
+operand eRegP_win95_safe() %{
+  constraint(ALLOC_IN_RC(e_reg_win95_safe));
+  match(RegP);
+  match(eAXRegP);
+  match(eBXRegP);
+  match(eCXRegP);
+  match(eDIRegP);
+
+  op_cost(100);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand naxRegP() %{
+  constraint(ALLOC_IN_RC(nax_reg));
+  match(RegP);
+  match(eBXRegP);
+  match(eDXRegP);
+  match(eCXRegP);
+  match(eSIRegP);
+  match(eDIRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand nabxRegP() %{
+  constraint(ALLOC_IN_RC(nabx_reg));
+  match(RegP);
+  match(eCXRegP);
+  match(eDXRegP);
+  match(eSIRegP);
+  match(eDIRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand pRegP() %{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(RegP);
+  match(eBXRegP);
+  match(eDXRegP);
+  match(eSIRegP);
+  match(eDIRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Special Registers
+// Return a pointer value
+operand eAXRegP(eRegP reg) %{
+  constraint(ALLOC_IN_RC(eax_reg));
+  match(reg);
+  format %{ "EAX" %}
+  interface(REG_INTER);
+%}
+
+// Used in AtomicAdd
+operand eBXRegP(eRegP reg) %{
+  constraint(ALLOC_IN_RC(ebx_reg));
+  match(reg);
+  format %{ "EBX" %}
+  interface(REG_INTER);
+%}
+
+// Tail-call (interprocedural jump) to interpreter
+operand eCXRegP(eRegP reg) %{
+  constraint(ALLOC_IN_RC(ecx_reg));
+  match(reg);
+  format %{ "ECX" %}
+  interface(REG_INTER);
+%}
+
+operand eSIRegP(eRegP reg) %{
+  constraint(ALLOC_IN_RC(esi_reg));
+  match(reg);
+  format %{ "ESI" %}
+  interface(REG_INTER);
+%}
+
+// Used in rep stosw
+operand eDIRegP(eRegP reg) %{
+  constraint(ALLOC_IN_RC(edi_reg));
+  match(reg);
+  format %{ "EDI" %}
+  interface(REG_INTER);
+%}
+
+operand eBPRegP() %{
+  constraint(ALLOC_IN_RC(ebp_reg));
+  match(RegP);
+  format %{ "EBP" %}
+  interface(REG_INTER);
+%}
+
+operand eRegL() %{
+  constraint(ALLOC_IN_RC(long_reg));
+  match(RegL);
+  match(eADXRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand eADXRegL( eRegL reg ) %{
+  constraint(ALLOC_IN_RC(eadx_reg));
+  match(reg);
+
+  format %{ "EDX:EAX" %}
+  interface(REG_INTER);
+%}
+
+operand eBCXRegL( eRegL reg ) %{
+  constraint(ALLOC_IN_RC(ebcx_reg));
+  match(reg);
+
+  format %{ "EBX:ECX" %}
+  interface(REG_INTER);
+%}
+
+// Special case for integer high multiply
+operand eADXRegL_low_only() %{
+  constraint(ALLOC_IN_RC(eadx_reg));
+  match(RegL);
+
+  format %{ "EAX" %}
+  interface(REG_INTER);
+%}
+
+// Flags register, used as output of compare instructions
+operand eFlagsReg() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "EFLAGS" %}
+  interface(REG_INTER);
+%}
+
+// Flags register, used as output of FLOATING POINT compare instructions
+operand eFlagsRegU() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+
+  format %{ "EFLAGS_U" %}
+  interface(REG_INTER);
+%}
+
+// Condition Code Register used by long compare
+operand flagsReg_long_LTGE() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+  format %{ "FLAGS_LTGE" %}
+  interface(REG_INTER);
+%}
+operand flagsReg_long_EQNE() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+  format %{ "FLAGS_EQNE" %}
+  interface(REG_INTER);
+%}
+operand flagsReg_long_LEGT() %{
+  constraint(ALLOC_IN_RC(int_flags));
+  match(RegFlags);
+  format %{ "FLAGS_LEGT" %}
+  interface(REG_INTER);
+%}
+
+// Float register operands
+operand regD() %{
+  constraint(ALLOC_IN_RC(dbl_reg));
+  match(RegD);
+  match(regDPR1);
+  match(regDPR2);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand regDPR1(regD reg) %{
+  constraint(ALLOC_IN_RC(dbl_reg0));
+  match(reg);
+  format %{ "FPR1" %}
+  interface(REG_INTER);
+%}
+
+operand regDPR2(regD reg) %{
+  constraint(ALLOC_IN_RC(dbl_reg1));
+  match(reg);
+  format %{ "FPR2" %}
+  interface(REG_INTER);
+%}
+
+// XMM Double register operands
+operand regXD() %{
+  predicate( UseSSE==2 );
+  constraint(ALLOC_IN_RC(xdb_reg));
+  match(RegD);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Float register operands
+operand regF() %{
+  constraint(ALLOC_IN_RC(flt_reg));
+  match(RegF);
+  match(regFPR1);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Float register operands
+operand regFPR1(regF reg) %{
+  constraint(ALLOC_IN_RC(flt_reg0));
+  match(reg);
+  format %{ "FPR1" %}
+  interface(REG_INTER);
+%}
+
+// XMM register operands
+operand regX() %{
+  predicate( UseSSE>=1 );
+  constraint(ALLOC_IN_RC(xmm_reg));
+  match(RegF);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
+//----------Memory Operands----------------------------------------------------
+// Direct Memory Operand
+operand direct(immP addr) %{
+  match(addr);
+
+  format %{ "[$addr]" %}
+  interface(MEMORY_INTER) %{
+    base(0xFFFFFFFF);
+    index(0x4);
+    scale(0x0);
+    disp($addr);
+  %}
+%}
+
+// Indirect Memory Operand
+operand indirect(eRegP reg) %{
+  constraint(ALLOC_IN_RC(e_reg));
+  match(reg);
+
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset8(eRegP reg, immI8 off) %{
+  match(AddP reg off);
+
+  format %{ "[$reg + $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Long Offset Operand
+operand indOffset32(eRegP reg, immI off) %{
+  match(AddP reg off);
+
+  format %{ "[$reg + $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Long Offset Operand
+operand indOffset32X(eRegI reg, immP off) %{
+  match(AddP off reg);
+
+  format %{ "[$reg + $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
+  match(AddP (AddP reg ireg) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $ireg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndex(eRegP reg, eRegI ireg) %{
+  match(AddP reg ireg);
+
+  op_cost(10);
+  format %{"[$reg + $ireg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// // -------------------------------------------------------------------------
+// // 486 architecture doesn't support "scale * index + offset" with out a base
+// // -------------------------------------------------------------------------
+// // Scaled Memory Operands
+// // Indirect Memory Times Scale Plus Offset Operand
+// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
+//   match(AddP off (LShiftI ireg scale));
+//
+//   op_cost(10);
+//   format %{"[$off + $ireg << $scale]" %}
+//   interface(MEMORY_INTER) %{
+//     base(0x4);
+//     index($ireg);
+//     scale($scale);
+//     disp($off);
+//   %}
+// %}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
+  match(AddP reg (LShiftI ireg scale));
+
+  op_cost(10);
+  format %{"[$reg + $ireg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
+operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
+  match(AddP (AddP reg (LShiftI ireg scale)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $ireg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+//----------Load Long Memory Operands------------------------------------------
+// The load-long idiom will use it's address expression again after loading
+// the first word of the long.  If the load-long destination overlaps with
+// registers used in the addressing expression, the 2nd half will be loaded
+// from a clobbered address.  Fix this by requiring that load-long use
+// address registers that do not overlap with the load-long target.
+
+// load-long support
+operand load_long_RegP() %{
+  constraint(ALLOC_IN_RC(esi_reg));
+  match(RegP);
+  match(eSIRegP);
+  op_cost(100);
+  format %{  %}
+  interface(REG_INTER);
+%}
+
+// Indirect Memory Operand Long
+operand load_long_indirect(load_long_RegP reg) %{
+  constraint(ALLOC_IN_RC(esi_reg));
+  match(reg);
+
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Long Offset Operand
+operand load_long_indOffset32(load_long_RegP reg, immI off) %{
+  match(AddP reg off);
+
+  format %{ "[$reg + $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+opclass load_long_memory(load_long_indirect, load_long_indOffset32);
+
+
+//----------Special Memory Operands--------------------------------------------
+// Stack Slot Operand - This operand is used for loading and storing temporary
+//                      values on the stack where a match requires a value to
+//                      flow through memory.
+operand stackSlotP(sRegP reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x4);   // ESP
+    index(0x4);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotI(sRegI reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x4);   // ESP
+    index(0x4);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotF(sRegF reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x4);   // ESP
+    index(0x4);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotD(sRegD reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x4);   // ESP
+    index(0x4);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotL(sRegL reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x4);   // ESP
+    index(0x4);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+//----------Memory Operands - Win95 Implicit Null Variants----------------
+// Indirect Memory Operand
+operand indirect_win95_safe(eRegP_win95_safe reg)
+%{
+  constraint(ALLOC_IN_RC(e_reg));
+  match(reg);
+
+  op_cost(100);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset8_win95_safe(eRegP_win95_safe reg, immI8 off)
+%{
+  match(AddP reg off);
+
+  op_cost(100);
+  format %{ "[$reg + $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Long Offset Operand
+operand indOffset32_win95_safe(eRegP_win95_safe reg, immI off)
+%{
+  match(AddP reg off);
+
+  op_cost(100);
+  format %{ "[$reg + $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexOffset_win95_safe(eRegP_win95_safe reg, eRegI ireg, immI off)
+%{
+  match(AddP (AddP reg ireg) off);
+
+  op_cost(100);
+  format %{"[$reg + $off + $ireg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScale_win95_safe(eRegP_win95_safe reg, eRegI ireg, immI2 scale)
+%{
+  match(AddP reg (LShiftI ireg scale));
+
+  op_cost(100);
+  format %{"[$reg + $ireg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
+operand indIndexScaleOffset_win95_safe(eRegP_win95_safe reg, immI off, eRegI ireg, immI2 scale)
+%{
+  match(AddP (AddP reg (LShiftI ireg scale)) off);
+
+  op_cost(100);
+  format %{"[$reg + $off + $ireg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op  - This is the operation of the comparison, and is limited to
+//                  the following set of codes:
+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+// Comparision Code
+operand cmpOp() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x4);
+    not_equal(0x5);
+    less(0xC);
+    greater_equal(0xD);
+    less_equal(0xE);
+    greater(0xF);
+  %}
+%}
+
+// Comparison Code, unsigned compare.  Used by FP also, with
+// C2 (unordered) turned into GT or LT already.  The other bits
+// C0 and C3 are turned into Carry & Zero flags.
+operand cmpOpU() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x4);
+    not_equal(0x5);
+    less(0x2);
+    greater_equal(0x3);
+    less_equal(0x6);
+    greater(0x7);
+  %}
+%}
+
+// Comparison Code for FP conditional move
+operand cmpOp_fcmov() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal        (0x0C8);
+    not_equal    (0x1C8);
+    less         (0x0C0);
+    greater_equal(0x1C0);
+    less_equal   (0x0D0);
+    greater      (0x1D0);
+  %}
+%}
+
+// Comparision Code used in long compares
+operand cmpOp_commute() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x4);
+    not_equal(0x5);
+    less(0xF);
+    greater_equal(0xE);
+    less_equal(0xD);
+    greater(0xC);
+  %}
+%}
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used as to simplify
+// instruction definitions by not requiring the AD writer to specify seperate
+// instructions for every form of operand when the instruction accepts
+// multiple operand types with the same basic encoding and format.  The classic
+// case of this is memory operands.
+
+opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
+               indIndex, indIndexScale, indIndexScaleOffset);
+
+// Long memory operations are encoded in 2 instructions and a +4 offset.
+// This means some kind of offset is always required and you cannot use
+// an oop as the offset (done when working on static globals).
+opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
+                    indIndex, indIndexScale, indIndexScaleOffset);
+
+
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.
+pipeline %{
+
+	//----------ATTRIBUTES---------------------------------------------------------
+	attributes %{
+		fixed_size_instructions;        		// Fixed size instructions
+		branch_has_delay_slot;							// branch have delay slot in gs2
+		max_instructions_per_bundle = 4;   	// Up to 5 instructions per bundle
+		instruction_unit_size = 4;         	// An instruction is 4 bytes long
+		instruction_fetch_unit_size = 32;  	// The processor fetches one line
+		instruction_fetch_units = 1;       	// of 32 bytes
+
+		// List of nop instructions
+		nops( MachNop );
+	%}
+
+	//----------RESOURCES----------------------------------------------------------
+	// Resources are the functional units available to the machine
+
+	// godson2c pipeline
+	// 4 decoders, a "bundle" is the limit 4 instructions decoded per cycle
+	// 1 load/store ops per cycle, 1 branch, 2 FPU,
+	// 2 ALU op, only ALU0 handles mul/div instructions.
+	resources( D0, D1, D2, D3, DECODE = D0 | D1 | D2 | D3,
+			MEM, BR, FPU0, FPU1, FPU = FPU0 | FPU1,
+			ALU0, ALU1, ALU = ALU0 | ALU1 );
+
+	//----------PIPELINE DESCRIPTION-----------------------------------------------
+	// Pipeline Description specifies the stages in the machine's pipeline
+
+	// godson 2c pipeline
+	// i dont know the detail of the godson 2c pipeline, leave it blank now.
+	// by yjl 2/21/2006
+	pipe_desc(S0, S1, S2, S3, S4, S5, S6);
+
+	//----------PIPELINE CLASSES---------------------------------------------------
+	// Pipeline Classes describe the stages in which input and output are
+	// referenced by the hardware pipeline.
+
+	// Naming convention: ialu or fpu
+	// Then: _reg
+	// Then: _reg if there is a 2nd register
+	// Then: _long if it's a pair of instructions implementing a long
+	// Then: _fat if it requires the big decoder
+	//   Or: _mem if it requires the big decoder and a memory unit.
+
+	// Integer ALU reg operation
+	pipe_class ialu_reg(eRegI dst) %{
+		single_instruction;
+		dst    : S4(write);
+		dst    : S3(read);
+		DECODE : S0;        // any decoder
+		ALU    : S3;        // any alu
+	%}
+
+	// Long ALU reg operation
+	pipe_class ialu_reg_long(eRegL dst) %{
+		instruction_count(2);
+		dst    : S4(write);
+		dst    : S3(read);
+		DECODE : S0(2);     // any 2 decoders
+		ALU    : S3(2);     // both alus
+	%}
+
+	// Integer ALU reg-reg operation
+	pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
+		single_instruction;
+		dst    : S4(write);
+		src    : S3(read);
+		DECODE : S0;        // any decoder
+		ALU    : S3;        // any alu
+	%}
+
+	// Long ALU reg-reg operation
+	pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
+		instruction_count(2);
+		dst    : S4(write);
+		src    : S3(read);
+		DECODE : S0(2);     // any 2 decoders
+		ALU    : S3(2);     // both alus
+	%}
+
+	// Integer Store to Memory
+	pipe_class ialu_mem_reg(memory mem, eRegI src) %{
+		single_instruction;
+		mem    : S3(read);
+		src    : S5(read);
+		D0     : S0;        // big decoder only
+		ALU    : S4;        // any alu
+		MEM    : S3;
+	%}
+
+	// Long Store to Memory
+	pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
+		instruction_count(2);
+		mem    : S3(read);
+		src    : S5(read);
+		D0     : S0(2);     // big decoder only; twice
+		ALU    : S4(2);     // any 2 alus
+		MEM    : S3(2);     // Both mems
+	%}
+
+	// Integer ALU0 reg-reg operation
+	pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
+		single_instruction;
+		dst    : S4(write);
+		src    : S3(read);
+		D0     : S0;        // Big decoder only
+		ALU0   : S3;        // only alu0
+	%}
+
+	// Integer ALU reg-imm operation
+	pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
+		single_instruction;
+		cr     : S4(write);
+		src1   : S3(read);
+		DECODE : S0;        // any decoder
+		ALU    : S3;        // any alu
+	%}
+
+	// Float reg-reg operation
+	pipe_class fpu_reg(regD dst) %{
+		instruction_count(2);
+		dst    : S3(read);
+		DECODE : S0(2);     // any 2 decoders
+		FPU    : S3;
+	%}
+
+	// Float reg-reg operation
+	pipe_class fpu_reg_reg(regD dst, regD src) %{
+		instruction_count(2);
+		dst    : S4(write);
+		src    : S3(read);
+		DECODE : S0(2);     // any 2 decoders
+		FPU    : S3;
+	%}
+
+	// Float reg-reg operation
+	pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
+		instruction_count(3);
+		dst    : S4(write);
+		src1   : S3(read);
+		src2   : S3(read);
+		DECODE : S0(3);     // any 3 decoders
+		FPU    : S3(2);
+	%}
+
+	// UnConditional branch
+	pipe_class pipe_jmp( label labl ) %{
+		single_instruction;
+		BR   : S3;
+	%}
+
+	// Conditional branch
+	pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
+		single_instruction;
+		cr    : S1(read);
+		BR    : S3;
+	%}
+
+	// The real do-nothing guy
+	pipe_class empty( ) %{
+		instruction_count(0);
+	%}
+
+	// Define the class for the Nop node
+	define %{
+		MachNop = empty;
+	%}
+
+%}
+
+//----------INSTRUCTIONS-------------------------------------------------------
+//
+// match      -- States which machine-independent subtree may be replaced
+//               by this instruction.
+// ins_cost   -- The estimated cost of this instruction is used by instruction
+//               selection to identify a minimum cost tree of machine
+//               instructions that matches a tree of machine-independent
+//               instructions.
+// format     -- A string providing the disassembly for this instruction.
+//               The value of an instruction's operand may be inserted
+//               by referring to it with a '$' prefix.
+// opcode     -- Three instruction opcodes may be provided.  These are referred
+//               to within an encode class as $primary, $secondary, and $tertiary
+//               respectively.  The primary opcode is commonly used to
+//               indicate the type of machine instruction, while secondary
+//               and tertiary are often used for prefix options or addressing
+//               modes.
+// ins_encode -- A list of encode classes with parameters. The encode class
+//               name must have been defined in an 'enc_class' specification
+//               in the encode section of the architecture description.
+
+instruct box_handle( eRegP dst, stackSlotP src) %{
+  match( Set dst (Box src) );
+  ins_cost(110);
+  format %{ "LEA    $dst,$src\t! (box node)" %}
+  opcode(0x8D);
+  ins_encode( OpcP, RegMem(dst,src));
+  ins_pipe( ialu_reg_reg_fat );
+%}
+
+
+//----------Load/Store/Move Instructions---------------------------------------
+//----------Load Instructions--------------------------------------------------
+// Load Byte (8bit signed)
+instruct loadB(xRegI dst, memory mem) %{
+  match(Set dst (LoadB mem));
+
+  ins_cost(125);
+  format %{ "MOVSX8 $dst,$mem" %}
+  opcode(0xBE, 0x0F);
+  ins_encode( OpcS, OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Load Byte (8bit UNsigned)
+instruct loadUB(xRegI dst, memory mem, immI_255 bytemask) %{
+  match(Set dst (AndI (LoadB mem) bytemask));
+
+  ins_cost(125);
+  format %{ "MOVZX8 $dst,$mem" %}
+  opcode(0xB6, 0x0F);
+  ins_encode( OpcS, OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Load Char (16bit unsigned)
+instruct loadC(eRegI dst, memory mem) %{
+  match(Set dst (LoadC mem));
+
+  ins_cost(125);
+  format %{ "MOVZX  $dst,$mem" %}
+  opcode(0xB7, 0x0F);
+  ins_encode( OpcS, OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Load Integer
+instruct loadI(eRegI dst, memory mem) %{
+  match(Set dst (LoadI mem));
+
+  ins_cost(125);
+  format %{ "lw    $dst,$mem" %}
+  //opcode(0x8B);
+  //ins_encode( OpcP, RegMem(dst,mem));
+  //ins_pipe( ialu_reg_mem );
+%}
+
+// Load Long.  Cannot clobber address while loading, so restrict address
+// register to ESI
+instruct loadL(eRegL dst, load_long_memory mem) %{
+  predicate(!Compile::current()->alias_type(n->adr_type())->is_volatile());
+  match(Set dst (LoadL mem));
+
+  ins_cost(250);
+  format %{ "MOV    $dst.lo,$mem\n\t"
+            "MOV    $dst.hi,$mem+4" %}
+  opcode(0x8B, 0x8B);
+  ins_encode( OpcP, RegMem(dst,mem), OpcS, RegMem_Hi(dst,mem));
+  ins_pipe( ialu_reg_long_mem );
+%}
+
+// Volatile Load Long.  Must be atomic, so do 64-bit FILD
+// then store it down to the stack and reload on the int
+// side.
+instruct loadL_volatile(stackSlotL dst, memory mem) %{
+  predicate(Compile::current()->alias_type(n->adr_type())->is_volatile());
+  match(Set dst (LoadL mem));
+
+  ins_cost(200);
+  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
+            "FISTp  $dst" %}
+  ins_encode(enc_loadL_volatile(mem,dst));
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Load Range
+instruct loadRange(eRegI dst, memory mem) %{
+  match(Set dst (LoadRange mem));
+
+  ins_cost(125);
+  format %{ "MOV    $dst,$mem" %}
+  opcode(0x8B);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_mem );
+%}
+
+
+// Load Pointer
+instruct loadP(eRegP dst, memory mem) %{
+  match(Set dst (LoadP mem));
+
+  ins_cost(125);
+  format %{ "MOV    $dst,$mem" %}
+  opcode(0x8B);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Load Klass Pointer
+instruct loadKlass(eRegP dst, memory mem) %{
+  match(Set dst (LoadKlass mem));
+
+  ins_cost(125);
+  format %{ "MOV    $dst,$mem" %}
+  opcode(0x8B);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Load Short (16bit signed)
+instruct loadS(eRegI dst, memory mem) %{
+  match(Set dst (LoadS mem));
+
+  ins_cost(125);
+  format %{ "MOVSX  $dst,$mem" %}
+  opcode(0xBF, 0x0F);
+  ins_encode( OpcS, OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Load Double
+instruct loadD(regD dst, memory mem) %{
+  predicate(UseSSE<=1);
+  match(Set dst (LoadD mem));
+
+  ins_cost(150);
+  format %{ "FLD_D  ST,$mem\n\t"
+            "FSTP   $dst" %}
+  opcode(0xDD);               /* DD /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),
+              Pop_Reg_D(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Load Double to XMM
+instruct loadXD(regXD dst, memory mem) %{
+  predicate(UseSSE==2);
+  match(Set dst (LoadD mem));
+  ins_cost(145);
+  format %{ "MOVSD  $dst,$mem" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Load to XMM register (single-precision floating point)
+// MOVSS instruction
+instruct loadX(regX dst, memory mem) %{
+  predicate(UseSSE>=1);
+  match(Set dst (LoadF mem));
+  ins_cost(145);
+  format %{ "MOVSS  $dst,$mem" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Load Float
+instruct loadF(regF dst, memory mem) %{
+  predicate(UseSSE==0);
+  match(Set dst (LoadF mem));
+
+  ins_cost(150);
+  format %{ "FLD_S  ST,$mem\n\t"
+            "FSTP   $dst" %}
+  opcode(0xD9);               /* D9 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),
+              Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Load Effective Address
+instruct leaP8(eRegP dst, indOffset8 mem) %{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "LEA    $dst,$mem" %}
+  opcode(0x8D);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_reg_fat );
+%}
+
+instruct leaP32(eRegP dst, indOffset32 mem) %{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "LEA    $dst,$mem" %}
+  opcode(0x8D);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_reg_fat );
+%}
+
+instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "LEA    $dst,$mem" %}
+  opcode(0x8D);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_reg_fat );
+%}
+
+instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "LEA    $dst,$mem" %}
+  opcode(0x8D);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_reg_fat );
+%}
+
+instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "LEA    $dst,$mem" %}
+  opcode(0x8D);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_reg_fat );
+%}
+
+// Load Constant
+instruct loadConI(eRegI dst, immI src) %{
+  match(Set dst src);
+
+  format %{ "MOV    $dst,$src" %}
+  ins_encode( LdImmI(dst, src) );
+  ins_pipe( ialu_reg_fat );
+%}
+
+// Load Constant zero
+instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+
+  ins_cost(50);
+  format %{ "XOR    $dst,$dst" %}
+  opcode(0x33);  /* + rd */
+  ins_encode( OpcP, RegReg( dst, dst ) );
+  ins_pipe( ialu_reg );
+%}
+
+instruct loadConP(eRegP dst, immP src) %{
+  match(Set dst src);
+
+  format %{ "MOV    $dst,$src" %}
+  opcode(0xB8);  /* + rd */
+  ins_encode( LdImmP(dst, src) );
+  ins_pipe( ialu_reg_fat );
+%}
+
+instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  ins_cost(200);
+  format %{ "MOV    $dst.lo,$src.lo\n\t"
+            "MOV    $dst.hi,$src.hi" %}
+  opcode(0xB8);
+  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
+  ins_pipe( ialu_reg_long_fat );
+%}
+
+instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  ins_cost(150);
+  format %{ "XOR    $dst.lo,$dst.lo\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+  opcode(0x33,0x33);
+  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Load return address of the following native call into a register
+instruct loadConPc(eRegP dst, method offset_to_call_return) %{
+  match(Set dst (LoadPC));
+  effect(USE offset_to_call_return);
+  format %{ "MOV    $dst, PC" %}
+  size(5);
+  opcode(0xB8);  /* + rd */
+  ins_encode( LdImmPc(dst, offset_to_call_return) );
+  ins_pipe( ialu_reg_fat );
+%}
+
+instruct loadConF(regF dst, immF src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "FLD_S  ST,$src\n\t"
+            "FSTP   $dst" %}
+  opcode(0xD9, 0x00);       /* D9 /0 */
+  ins_encode(LdImmF(src), Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_con );
+%}
+
+instruct loadConX(regX dst, immXF con) %{
+  match(Set dst con);
+  format %{ "MOVSS  $dst,[$con]" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadConX0(regX dst, immXF0 src) %{
+  match(Set dst src);
+  format %{ "XORPS  $dst,$dst\t# Zero XMM register" %}
+  ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadConD(regD dst, immD src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "FLD_D  ST,$src\n\t"
+            "FSTP   $dst" %}
+  ins_encode(LdImmD(src), Pop_Reg_D(dst) );
+  ins_pipe( fpu_reg_con );
+%}
+
+instruct loadConXD(regXD dst, immXD con) %{
+  match(Set dst con);
+  format %{ "MOVSD  $dst,[$con]" %}
+  ins_encode(Opcode(0xF2), Opcode(0x0F), Opcode(0x10), LdImmXD(dst, con));
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadConXD0(regXD dst, immXD0 src) %{
+  match(Set dst src);
+  format %{ "XORPD  $dst,$dst\t# Zero XMM register" %}
+  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
+  ins_pipe( pipe_slow );
+%}
+
+// Load Stack Slot
+instruct loadSSI(eRegI dst, stackSlotI src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "MOV    $dst,$src" %}
+  opcode(0x8B);
+  ins_encode( OpcP, RegMem(dst,src));
+  ins_pipe( ialu_reg_mem );
+%}
+
+instruct loadSSL(eRegL dst, stackSlotL src) %{
+  match(Set dst src);
+
+  ins_cost(200);
+  format %{ "MOV    $dst,$src.lo\n\t"
+            "MOV    $dst+4,$src.hi" %}
+  opcode(0x8B, 0x8B);
+  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
+  ins_pipe( ialu_mem_long_reg );
+%}
+
+// Load Stack Slot
+instruct loadSSP(eRegP dst, stackSlotP src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "MOV    $dst,$src" %}
+  opcode(0x8B);
+  ins_encode( OpcP, RegMem(dst,src));
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Load Stack Slot
+instruct loadSSF(regF dst, stackSlotF src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "FLD_S  $src\n\t"
+            "FSTP   $dst" %}
+  opcode(0xD9);               /* D9 /0, FLD m32real */
+  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
+              Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Load Stack Slot
+instruct loadSSD(regD dst, stackSlotD src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "FLD_D  $src\n\t"
+            "FSTP   $dst" %}
+  opcode(0xDD);               /* DD /0, FLD m32real */
+  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
+              Pop_Reg_D(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Prefetch with SSE instruction
+instruct prefetch1( memory mem ) %{
+  predicate (UseSSE>=1);
+  match( Prefetch mem );
+  ins_cost(125);
+
+  format %{ "PREFETCH_L2 $mem\t! Prefetch to level 2 cache" %}
+  opcode( 0x0F, 0x18 );     /* Opcode 0F 18 /3 */
+  ins_encode( OpcP, OpcS, RMopc_Mem(0x03,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Prefetch - MOV into EAX.
+// NOT safe against out-of-range requests.
+instruct prefetch0( memory mem, eFlagsReg cr ) %{
+  predicate (UseSSE==0);
+  match( Prefetch mem );
+  effect( KILL cr );
+  ins_cost(100);
+
+  format %{ "CMP    EAX,$mem\t! Prefetch only, no flags" %}
+  opcode( 0x3B );
+  ins_encode( OpcP, RegMem( EAX, mem ) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+
+//----------Store Instructions-------------------------------------------------
+// Store Byte
+instruct storeB(memory mem, xRegI src) %{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(125);
+  format %{ "MOV8   $mem,$src" %}
+  opcode(0x88);
+  ins_encode( OpcP, RegMem( src, mem ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Store Char/Short
+instruct storeC(memory mem, eRegI src) %{
+  match(Set mem (StoreC mem src));
+
+  ins_cost(125);
+  format %{ "MOV16  $mem,$src" %}
+  opcode(0x89, 0x66);
+  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Store Integer
+instruct storeI(memory mem, eRegI src) %{
+  match(Set mem (StoreI mem src));
+
+  ins_cost(125);
+  format %{ "MOV    $mem,$src" %}
+  opcode(0x89);
+  ins_encode( OpcP, RegMem( src, mem ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Store Long
+instruct storeL(long_memory mem, eRegL src) %{
+  predicate(!Compile::current()->alias_type(n->adr_type())->is_volatile());
+  match(Set mem (StoreL mem src));
+
+  ins_cost(200);
+  format %{ "MOV    $mem,$src.lo\n\t"
+            "MOV    $mem+4,$src.hi" %}
+  opcode(0x89, 0x89);
+  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
+  ins_pipe( ialu_mem_long_reg );
+%}
+
+// Volatile Store Long.  Must be atomic, so move it into
+// the FP TOS and then do a 64-bit FIST.  Has to probe the
+// target address before the store (for null-ptr checks)
+// so the memory operand is used twice in the encoding.
+instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
+  predicate(Compile::current()->alias_type(n->adr_type())->is_volatile());
+  match(Set mem (StoreL mem src));
+  effect( KILL cr );
+  ins_cost(400);
+  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
+            "FILD   $src\n\t"
+            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
+  opcode(0x3B);
+  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Store Pointer; for storing unknown oops and raw pointers
+instruct storeP(memory mem, anyRegP src) %{
+  match(Set mem (StoreP mem src));
+
+  ins_cost(125);
+  format %{ "MOV    $mem,$src" %}
+  opcode(0x89);
+  ins_encode( OpcP, RegMem( src, mem ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Store Integer Immediate
+instruct storeImmI(memory mem, immI src) %{
+  match(Set mem (StoreI mem src));
+
+  ins_cost(150);
+  format %{ "MOV    $mem,$src" %}
+  opcode(0xC7);               /* C7 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Store Short/Char Immediate
+instruct storeImmI16(memory mem, immI16 src) %{
+  match(Set mem (StoreC mem src));
+
+  ins_cost(150);
+  format %{ "MOV16  $mem,$src" %}
+  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
+  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Store Pointer Immediate; null pointers or constant oops that do not
+// need card-mark barriers.
+instruct storeImmP(memory mem, immP src) %{
+  match(Set mem (StoreP mem src));
+
+  ins_cost(150);
+  format %{ "MOV    $mem,$src" %}
+  opcode(0xC7);               /* C7 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Store Byte Immediate
+instruct storeImmB(memory mem, immI8 src) %{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(150);
+  format %{ "MOV8   $mem,$src" %}
+  opcode(0xC6);               /* C6 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Store CMS card-mark Immediate
+instruct storeImmCM(memory mem, immI8 src) %{
+  match(Set mem (StoreCM mem src));
+
+  ins_cost(150);
+  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
+  opcode(0xC6);               /* C6 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Store Double
+instruct storeD( memory mem, regDPR1 src) %{
+  predicate(UseSSE<=1);
+  match(Set mem (StoreD mem src));
+
+  ins_cost(100);
+  format %{ "FST_D  $mem,$src" %}
+  opcode(0xDD);       /* DD /2 */
+  ins_encode( enc_FP_store(mem,src) );
+  ins_pipe( fpu_mem_reg );
+%}
+
+// Store double does rounding on x86
+instruct storeD_rounded( memory mem, regDPR1 src) %{
+  predicate(UseSSE<=1);
+  match(Set mem (StoreD mem (RoundDouble src)));
+
+  ins_cost(100);
+  format %{ "FST_D  $mem,$src\t# round" %}
+  opcode(0xDD);       /* DD /2 */
+  ins_encode( enc_FP_store(mem,src) );
+  ins_pipe( fpu_mem_reg );
+%}
+
+// Store XMM register to memory (double-precision floating points)
+// MOVSD instruction
+instruct storeXD(memory mem, regXD src) %{
+  predicate(UseSSE==2);
+  match(Set mem (StoreD mem src));
+  ins_cost(95);
+  format %{ "MOVSD  $mem,$src" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Store XMM register to memory (single-precision floating point)
+// MOVSS instruction
+instruct storeX(memory mem, regX src) %{
+  predicate(UseSSE>=1);
+  match(Set mem (StoreF mem src));
+  ins_cost(95);
+  format %{ "MOVSS  $mem,$src" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Store Float
+instruct storeF( memory mem, regFPR1 src) %{
+  predicate(UseSSE==0);
+  match(Set mem (StoreF mem src));
+
+  ins_cost(100);
+  format %{ "FST_S  $mem,$src" %}
+  opcode(0xD9);       /* D9 /2 */
+  ins_encode( enc_FP_store(mem,src) );
+  ins_pipe( fpu_mem_reg );
+%}
+
+// Store Float does rounding on x86
+instruct storeF_rounded( memory mem, regFPR1 src) %{
+  match(Set mem (StoreF mem (RoundFloat src)));
+
+  ins_cost(100);
+  format %{ "FST_S  $mem,$src\t# round" %}
+  opcode(0xD9);       /* D9 /2 */
+  ins_encode( enc_FP_store(mem,src) );
+  ins_pipe( fpu_mem_reg );
+%}
+
+// Store Float does rounding on x86
+instruct storeF_Drounded( memory mem, regDPR1 src) %{
+  match(Set mem (StoreF mem (ConvD2F src)));
+
+  ins_cost(100);
+  format %{ "FST_S  $mem,$src\t# D-round" %}
+  opcode(0xD9);       /* D9 /2 */
+  ins_encode( enc_FP_store(mem,src) );
+  ins_pipe( fpu_mem_reg );
+%}
+
+// Store Float
+instruct storeF_imm( memory mem, immF src) %{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(125);
+  format %{ "MOV    $mem,$src\t# store float" %}
+  opcode(0xC7);               /* C7 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Store Integer to stack slot
+instruct storeSSI(stackSlotI dst, eRegI src) %{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "MOV    $dst,$src" %}
+  opcode(0x89);
+  ins_encode( OpcPRegSS( dst, src ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Store Integer to stack slot
+instruct storeSSP(stackSlotP dst, eRegP src) %{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "MOV    $dst,$src" %}
+  opcode(0x89);
+  ins_encode( OpcPRegSS( dst, src ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Store Long to stack slot
+instruct storeSSL(stackSlotL dst, eRegL src) %{
+  match(Set dst src);
+
+  ins_cost(200);
+  format %{ "MOV    $dst,$src.lo\n\t"
+            "MOV    $dst+4,$src.hi" %}
+  opcode(0x89, 0x89);
+  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
+  ins_pipe( ialu_mem_long_reg );
+%}
+
+//----------MemBar Instructions-----------------------------------------------
+// Memory barrier flavors
+
+instruct membar_acquire() %{
+  match(MemBarAcquire);
+  ins_cost(400);
+
+  size(0);
+  format %{ "MEMBAR-acquire" %}
+  ins_encode( enc_membar_acquire );
+  ins_pipe(pipe_slow);
+%}
+
+instruct membar_acquire_lock() %{
+  match(MemBarAcquire);
+  predicate(Matcher::prior_fast_lock(n));
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+instruct membar_release() %{
+  match(MemBarRelease);
+  ins_cost(400);
+
+  size(0);
+  format %{ "MEMBAR-release" %}
+  ins_encode( enc_membar_release );
+  ins_pipe(pipe_slow);
+%}
+
+instruct membar_release_lock() %{
+  match(MemBarRelease);
+  predicate(Matcher::post_fast_unlock(n));
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+instruct membar_volatile() %{
+  match(MemBarVolatile);
+  ins_cost(400);
+
+  format %{ "MEMBAR-volatile" %}
+  ins_encode( enc_membar_volatile );
+  ins_pipe(pipe_slow);
+%}
+
+instruct unnecessary_membar_volatile() %{
+  match(MemBarVolatile);
+  predicate(Matcher::post_store_load_barrier(n));
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+instruct membar_cpu_order() %{
+  match(MemBarCPUOrder);
+  ins_cost(1);
+
+  format %{ "MEMBAR-CPUOrder" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+//----------Move Instructions--------------------------------------------------
+instruct castL2P(eAXRegP dst, eADXRegL src) %{
+  match(Set dst (CastL2P src));
+  format %{ "#castL2P of eAX" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe(empty);
+%}
+
+instruct castP2L(eADXRegL dst, eAXRegP src, eFlagsReg cr) %{
+  match(Set dst (CastP2L src));
+  effect(KILL cr);
+  ins_cost(50);
+  format %{ "#castP2L of eAX\n\t"
+            "XOR    EDX,EDX" %}
+  opcode(0x33);  /* + rd */
+  ins_encode( OpcP, RegReg( EDX, EDX ) );
+  ins_pipe( ialu_reg );
+%}
+
+instruct castP2I(eRegI dst, eRegP src ) %{
+  match(Set dst (CastP2I src));
+  ins_cost(50);
+  format %{ "MOV    $dst,$src\t# Cast ptr to int" %}
+  ins_encode( enc_Copy( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+//----------Conditional Move---------------------------------------------------
+// Conditional move
+instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
+  predicate(VM_Version::supports_cmov() );
+  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cop $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+instruct cmovI_regU( eRegI dst, eRegI src, eFlagsRegU cr, cmpOpU cop ) %{
+  predicate(VM_Version::supports_cmov() );
+  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cop $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+// Conditional move
+instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
+  predicate(VM_Version::supports_cmov() );
+  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
+  ins_cost(250);
+  format %{ "CMOV$cop $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
+  ins_pipe( pipe_cmov_mem );
+%}
+
+// Conditional move
+instruct cmovI_memu(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
+  predicate(VM_Version::supports_cmov() );
+  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
+  ins_cost(250);
+  format %{ "CMOV$cop $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
+  ins_pipe( pipe_cmov_mem );
+%}
+
+// Conditional move
+instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
+  predicate(VM_Version::supports_cmov() );
+  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cop $dst,$src\t# ptr" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+// Conditional move (non-P6 version)
+// Note:  a CMoveP is generated for  stubs and native wrappers
+//        regardless of whether we are on a P6, so we
+//        emulate a cmov here
+instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
+  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
+  ins_cost(300);
+  format %{ "Jn$cop   skip\n\t"
+          "MOV    $dst,$src\t# pointer\n"
+      "skip:" %}
+  opcode(0x8b);
+  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
+  ins_pipe( pipe_cmov_reg );
+%}
+
+// Conditional move
+instruct cmovP_regU(eRegP dst, eRegP src, eFlagsRegU cr, cmpOpU cop ) %{
+  predicate(VM_Version::supports_cmov() );
+  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cop $dst,$src\t# ptr" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+// DISABLED: Requires the ADLC to emit a bottom_type call that
+// correctly meets the two pointer arguments; one is an incoming
+// register but the other is a memory operand.  ALSO appears to
+// be buggy with implicit null checks.
+//
+//// Conditional move
+//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
+//  predicate(VM_Version::supports_cmov() );
+//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
+//  ins_cost(250);
+//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
+//  opcode(0x0F,0x40);
+//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
+//  ins_pipe( pipe_cmov_mem );
+//%}
+//
+//// Conditional move
+//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
+//  predicate(VM_Version::supports_cmov() );
+//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
+//  ins_cost(250);
+//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
+//  opcode(0x0F,0x40);
+//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
+//  ins_pipe( pipe_cmov_mem );
+//%}
+
+// Conditional move
+instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "FCMOV$cop $dst,$src\t# double" %}
+  opcode(0xDA);
+  ins_encode( enc_cmov_d(cop,src) );
+  ins_pipe( pipe_cmovD_reg );
+%}
+
+// Conditional move
+instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
+  predicate(UseSSE==0);
+  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "FCMOV$cop $dst,$src\t# float" %}
+  opcode(0xDA);
+  ins_encode( enc_cmov_d(cop,src) );
+  ins_pipe( pipe_cmovD_reg );
+%}
+
+// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
+instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "Jn$cop   skip\n\t"
+            "MOV    $dst,$src\t# double\n"
+      "skip:" %}
+  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
+  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
+  ins_pipe( pipe_cmovD_reg );
+%}
+
+// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
+instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
+  predicate(UseSSE==0);
+  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "Jn$cop    skip\n\t"
+            "MOV    $dst,$src\t# float\n"
+      "skip:" %}
+  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
+  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
+  ins_pipe( pipe_cmovD_reg );
+%}
+
+// No CMOVE with SSE/SSE2
+instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
+  predicate (UseSSE>=1);
+  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "Jn$cop   skip\n\t"
+            "MOVSS  $dst,$src\t# float\n"
+      "skip:" %}
+  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
+  ins_encode( enc_cmov_branch( cop, 0x04 ), MovX_reg(dst,src));
+  ins_pipe( pipe_slow );
+%}
+
+// No CMOVE with SSE/SSE2
+instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
+  predicate (UseSSE==2);
+  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "Jn$cop   skip\n\t"
+            "MOVSD  $dst,$src\t# float\n"
+      "skip:" %}
+  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
+  ins_encode( enc_cmov_branch( cop, 0x4 ), MovXD_reg(dst,src));
+  ins_pipe( pipe_slow );
+%}
+
+// unsigned version
+instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
+  predicate (UseSSE>=1);
+  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "Jn$cop   skip\n\t"
+            "MOVSS  $dst,$src\t# float\n"
+      "skip:" %}
+  ins_encode( enc_cmov_branch( cop, 0x4 ), MovX_reg(dst,src) );
+  ins_pipe( pipe_slow );
+%}
+
+// unsigned version
+instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
+  predicate (UseSSE==2);
+  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "Jn$cop   skip\n\t"
+            "MOVSD  $dst,$src\t# float\n"
+      "skip:" %}
+  ins_encode( enc_cmov_branch( cop, 0x4 ), MovXD_reg(dst,src) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
+  predicate(VM_Version::supports_cmov() );
+  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
+            "CMOV$cop $dst.hi,$src.hi" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
+  ins_pipe( pipe_cmov_reg_long );
+%}
+
+instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
+  predicate(VM_Version::supports_cmov() );
+  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
+            "CMOV$cop $dst.hi,$src.hi" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
+  ins_pipe( pipe_cmov_reg_long );
+%}
+
+//----------Arithmetic Instructions--------------------------------------------
+//----------Addition Instructions----------------------------------------------
+// Integer Addition Instructions
+instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (AddI dst src));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "ADD    $dst,$src" %}
+  opcode(0x03);
+  ins_encode( OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+  match(Set dst (AddI dst src));
+  effect(KILL cr);
+
+  format %{ "ADD    $dst,$src" %}
+  opcode(0x81, 0x00); /* /0 id */
+  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
+  ins_pipe( ialu_reg );
+%}
+
+instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+  match(Set dst (AddI dst src));
+  effect(KILL cr);
+
+  size(1);
+  format %{ "INC    $dst" %}
+  opcode(0x40); /*  */
+  ins_encode( Opc_plus( primary, dst ) );
+  ins_pipe( ialu_reg );
+%}
+
+instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
+  match(Set dst (AddI src0 src1));
+  ins_cost(110);
+
+  format %{ "LEA    $dst,[$src0 + $src1]" %}
+  opcode(0x8D); /* 0x8D /r */
+  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
+  match(Set dst (AddP src0 src1));
+  ins_cost(110);
+
+  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
+  opcode(0x8D); /* 0x8D /r */
+  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
+  match(Set dst (AddI dst src));
+  effect(KILL cr);
+
+  size(1);
+  format %{ "DEC    $dst" %}
+  opcode(0x48); /*  */
+  ins_encode( Opc_plus( primary, dst ) );
+  ins_pipe( ialu_reg );
+%}
+
+instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (AddP dst src));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "ADD    $dst,$src" %}
+  opcode(0x03);
+  ins_encode( OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
+  match(Set dst (AddP dst src));
+  effect(KILL cr);
+
+  format %{ "ADD    $dst,$src" %}
+  opcode(0x81,0x00); /* Opcode 81 /0 id */
+  // ins_encode( RegImm( dst, src) );
+  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
+  ins_pipe( ialu_reg );
+%}
+
+instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+  match(Set dst (AddI dst (LoadI src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "ADD    $dst,$src" %}
+  opcode(0x03);
+  ins_encode( OpcP, RegMem( dst, src) );
+  ins_pipe( ialu_reg_mem );
+%}
+
+instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(150);
+  format %{ "ADD    $dst,$src" %}
+  opcode(0x01);  /* Opcode 01 /r */
+  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Add Memory with Immediate
+instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "ADD    $dst,$src" %}
+  opcode(0x81);               /* Opcode 81 /0 id */
+  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
+  ins_pipe( ialu_mem_imm );
+%}
+
+instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "INC    $dst" %}
+  opcode(0xFF);               /* Opcode FF /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,dst));
+  ins_pipe( ialu_mem_imm );
+%}
+
+instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "DEC    $dst" %}
+  opcode(0xFF);               /* Opcode FF /1 */
+  ins_encode( OpcP, RMopc_Mem(0x01,dst));
+  ins_pipe( ialu_mem_imm );
+%}
+
+
+instruct checkCastPP( eRegP dst ) %{
+  match(Set dst (CheckCastPP dst));
+
+  size(0);
+  format %{ "#checkcastPP of $dst" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe( empty );
+%}
+
+instruct castPP( eRegP dst ) %{
+  match(Set dst (CastPP dst));
+  format %{ "#castPP of $dst" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe( empty );
+%}
+
+
+// Load-locked - same as a regular pointer load when used with compare-swap
+instruct loadPLocked(eRegP dst, memory mem) %{
+  match(Set dst (LoadPLocked mem));
+
+  ins_cost(125);
+  format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
+  opcode(0x8B);
+  ins_encode( OpcP, RegMem(dst,mem));
+  ins_pipe( ialu_reg_mem );
+%}
+
+// LoadLong-locked - same as a volatile long load when used with compare-swap
+instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
+  match(Set dst (LoadLLocked mem));
+
+  ins_cost(200);
+  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
+            "FISTp  $dst" %}
+  ins_encode(enc_loadL_volatile(mem,dst));
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
+instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
+  // EAX is killed if there is contention, but then it's also unused.
+  // In the common case of no contention, EAX holds the new oop address.
+  format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
+  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+// Conditional-store of a long value
+// Returns a boolean value (0/1) on success.  Implemented with a CMPXCHG8 on Intel.
+// mem_ptr can actually be in either ESI or EDI
+instruct storeLConditional( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+  match(Set res (StoreLConditional mem_ptr (Binary oldval newval)));
+  // EDX:EAX is killed if there is contention, but then it's also unused.
+  // In the common case of no contention, EDX:EAX holds the new oop address.
+  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
+            "MOV    $res,0\n\t"
+            "JNE,s  fail\n\t"
+            "MOV    $res,1\n"
+          "fail:" %}
+  ins_encode( enc_cmpxchg8(mem_ptr),
+              enc_flags_ne_to_boolean(res) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+// Conditional-store of a long value
+// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
+// mem_ptr can actually be in either ESI or EDI
+instruct storeLConditional_flags( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr, immI0 zero ) %{
+  match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero));
+  // EDX:EAX is killed if there is contention, but then it's also unused.
+  // In the common case of no contention, EDX:EAX holds the new oop address.
+  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
+  ins_encode( enc_cmpxchg8(mem_ptr) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
+
+instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
+  effect(KILL cr, KILL oldval);
+  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
+            "MOV    $res,0\n\t"
+            "JNE,s  fail\n\t"
+            "MOV    $res,1\n"
+          "fail:" %}
+  ins_encode( enc_cmpxchg8(mem_ptr),
+              enc_flags_ne_to_boolean(res) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
+  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+  effect(KILL cr, KILL oldval);
+  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
+            "MOV    $res,0\n\t"
+            "JNE,s  fail\n\t"
+            "MOV    $res,1\n"
+          "fail:" %}
+  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
+  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
+  effect(KILL cr, KILL oldval);
+  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
+            "MOV    $res,0\n\t"
+            "JNE,s  fail\n\t"
+            "MOV    $res,1\n"
+          "fail:" %}
+  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+//----------Subtraction Instructions-------------------------------------------
+// Integer Subtraction Instructions
+instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (SubI dst src));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "SUB    $dst,$src" %}
+  opcode(0x2B);
+  ins_encode( OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+  match(Set dst (SubI dst src));
+  effect(KILL cr);
+
+  format %{ "SUB    $dst,$src" %}
+  opcode(0x81,0x05);  /* Opcode 81 /5 */
+  // ins_encode( RegImm( dst, src) );
+  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
+  ins_pipe( ialu_reg );
+%}
+
+instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+  match(Set dst (SubI dst (LoadI src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "SUB    $dst,$src" %}
+  opcode(0x2B);
+  ins_encode( OpcP, RegMem( dst, src) );
+  ins_pipe( ialu_reg_mem );
+%}
+
+instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(150);
+  format %{ "SUB    $dst,$src" %}
+  opcode(0x29);  /* Opcode 29 /r */
+  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Subtract from a pointer
+instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
+  match(Set dst (AddP dst (SubI zero src)));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "SUB    $dst,$src" %}
+  opcode(0x2B);
+  ins_encode( OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
+  match(Set dst (SubI zero dst));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "NEG    $dst" %}
+  opcode(0xF7,0x03);  // Opcode F7 /3
+  ins_encode( OpcP, RegOpc( dst ) );
+  ins_pipe( ialu_reg );
+%}
+
+
+//----------Multiplication/Division Instructions-------------------------------
+// Integer Multiplication Instructions
+// Multiply Register
+instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (MulI dst src));
+  effect(KILL cr);
+
+  size(3);
+  ins_cost(300);
+  format %{ "IMUL   $dst,$src" %}
+  opcode(0xAF, 0x0F);
+  ins_encode( OpcS, OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg_alu0 );
+%}
+
+// Multiply 32-bit Immediate
+instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
+  match(Set dst (MulI src imm));
+  effect(KILL cr);
+
+  ins_cost(300);
+  format %{ "IMUL   $dst,$src,$imm" %}
+  opcode(0x69);  /* 69 /r id */
+  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
+  ins_pipe( ialu_reg_reg_alu0 );
+%}
+
+instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+
+  // Note that this is artificially increased to make it more expensive than loadConL
+  ins_cost(250);
+  format %{ "MOV    EAX,$src\t// low word only" %}
+  opcode(0xB8);
+  ins_encode( LdImmL_Lo(dst, src) );
+  ins_pipe( ialu_reg_fat );
+%}
+
+// Multiply by 32-bit Immediate, taking the shifted high order results
+//  (special case for shift by 32)
+instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
+  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
+  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
+             _kids[0]->_kids[0]->_kids[1]->_leaf->is_Type()->type()->is_long()->get_con() >= min_jint &&
+             _kids[0]->_kids[0]->_kids[1]->_leaf->is_Type()->type()->is_long()->get_con() <= max_jint );
+  effect(USE_KILL src1, KILL cr);
+
+  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
+  ins_cost(0*100 + 1*400 - 150);
+  format %{ "IMUL   EDX:EAX,$src1" %}
+  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
+  ins_pipe( pipe_slow );
+%}
+
+// Multiply by 32-bit Immediate, taking the shifted high order results
+instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
+  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
+  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
+             _kids[0]->_kids[0]->_kids[1]->_leaf->is_Type()->type()->is_long()->get_con() >= min_jint &&
+             _kids[0]->_kids[0]->_kids[1]->_leaf->is_Type()->type()->is_long()->get_con() <= max_jint );
+  effect(USE_KILL src1, KILL cr);
+
+  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
+  ins_cost(1*100 + 1*400 - 150);
+  format %{ "IMUL   EDX:EAX,$src1\n\t"
+            "SAR    EDX,$cnt-32" %}
+  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
+  ins_pipe( pipe_slow );
+%}
+
+// Multiply Memory 32-bit Immediate
+instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
+  match(Set dst (MulI (LoadI src) imm));
+  effect(KILL cr);
+
+  ins_cost(300);
+  format %{ "IMUL   $dst,$src,$imm" %}
+  opcode(0x69);  /* 69 /r id */
+  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
+  ins_pipe( ialu_reg_mem_alu0 );
+%}
+
+// Multiply Memory
+instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
+  match(Set dst (MulI dst (LoadI src)));
+  effect(KILL cr);
+
+  ins_cost(350);
+  format %{ "IMUL   $dst,$src" %}
+  opcode(0xAF, 0x0F);
+  ins_encode( OpcS, OpcP, RegMem( dst, src) );
+  ins_pipe( ialu_reg_mem_alu0 );
+%}
+
+// Multiply Register Int to Long
+instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
+  // Basic Idea: long = (long)int * (long)int
+  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
+  effect(DEF dst, USE src, USE src1, KILL flags);
+
+  ins_cost(300);
+  format %{ "IMUL   $dst,$src1" %}
+
+  ins_encode( long_int_multiply( dst, src1 ) );
+  ins_pipe( ialu_reg_reg_alu0 );
+%}
+
+instruct mulIS_eReg(eADXRegL dst, eBCXRegL mask, eRegL mask1, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
+  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
+  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask1)));
+  predicate(_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
+            _kids[0]->_kids[1]->_leaf->is_Type()->type()->is_long()->get_con() == 0xFFFFFFFFl &&
+            _kids[1]->_kids[1]->_leaf->Opcode() == Op_ConL &&
+            _kids[1]->_kids[1]->_leaf->is_Type()->type()->is_long()->get_con() == 0xFFFFFFFFl );
+  effect(DEF dst, USE src, USE src1, USE mask, USE mask1, KILL flags);
+
+  ins_cost(300);
+  format %{ "MUL    $dst,$src1" %}
+
+  ins_encode( long_uint_multiply(dst, src1) );
+  ins_pipe( ialu_reg_reg_alu0 );
+%}
+
+// Multiply Register Long
+instruct mulL_eReg(eADXRegL dst, eRegL src, eFlagsReg cr, eSIRegI esi) %{
+  match(Set dst (MulL dst src));
+  effect(KILL cr, KILL esi);
+  ins_cost(4*100+3*400);
+// Basic idea: lo(result) = lo(x_lo * y_lo)
+//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
+  format %{ "MOV    ESI,$src.lo\n\t"
+            "IMUL   ESI,EDX\n\t"
+            "MOV    EDX,$src.hi\n\t"
+            "IMUL   EDX,EAX\n\t"
+            "ADD    ESI,EDX\n\t"
+            "MUL    EDX:EAX,$src.lo\n\t"
+            "ADD    EDX,ESI" %}
+  ins_encode( long_multiply( dst, src, esi ) );
+  ins_pipe( pipe_slow );
+%}
+
+// Multiply Register Long by small constant
+instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eFlagsReg cr, eSIRegI esi) %{
+  match(Set dst (MulL dst src));
+  effect(KILL cr, KILL esi);
+  ins_cost(2*100+2*400);
+  size(12);
+// Basic idea: lo(result) = lo(src * EAX)
+//             hi(result) = hi(src * EAX) + lo(src * EDX)
+  format %{ "IMUL   ESI,EDX,$src\n\t"
+            "MOV    EDX,$src\n\t"
+            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
+            "ADD    EDX,ESI" %}
+  ins_encode( long_multiply_con( dst, src, esi ) );
+  ins_pipe( pipe_slow );
+%}
+
+// Integer DIV with Register
+instruct divI_eReg(eAXRegI eax, eDXRegI edx, eCXRegI div, eFlagsReg cr) %{
+  match(Set eax (DivI eax div));
+  effect(KILL edx, KILL cr);
+  size(26);
+  ins_cost(30*100+10*100);
+  format %{ "CMP    EAX,0x80000000\n\t"
+            "JNE,s  normal\n\t"
+            "XOR    EDX,EDX\n\t"
+            "CMP    ECX,-1\n\t"
+            "JE,s   done\n"
+    "normal: CDQ\n\t"
+            "IDIV   $div\n\t"
+    "done:"        %}
+  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
+  ins_encode( cdq_enc, OpcP, RegOpc(div) );
+  ins_pipe( ialu_reg_reg_alu0 );
+%}
+
+// Divide Register Long
+instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
+  match(Set dst (DivL src1 src2));
+  effect( KILL cr, KILL cx, KILL bx );
+  ins_cost(10000);
+  format %{ "PUSH   $src1.hi\n\t"
+            "PUSH   $src1.lo\n\t"
+            "PUSH   $src2.hi\n\t"
+            "PUSH   $src2.lo\n\t"
+            "CALL   SharedRuntime::ldiv\n\t"
+            "ADD    ESP,16" %}
+  ins_encode( long_div(src1,src2) );
+  ins_pipe( pipe_slow );
+%}
+
+// Integer MOD with Register
+instruct modI_eReg(eDXRegI edx, eAXRegI eax, eCXRegI div, eFlagsReg cr) %{
+  match(Set edx (ModI eax div));
+  effect(KILL eax, KILL cr);
+
+  size(26);
+  ins_cost(300);
+  format %{ "CDQ\n\t"
+            "IDIV   $div" %}
+  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
+  ins_encode( cdq_enc, OpcP, RegOpc(div) );
+  ins_pipe( ialu_reg_reg_alu0 );
+%}
+
+// Remainder Register Long
+instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
+  match(Set dst (ModL src1 src2));
+  effect( KILL cr, KILL cx, KILL bx );
+  ins_cost(10000);
+  format %{ "PUSH   $src1.hi\n\t"
+            "PUSH   $src1.lo\n\t"
+            "PUSH   $src2.hi\n\t"
+            "PUSH   $src2.lo\n\t"
+            "CALL   SharedRuntime::lrem\n\t"
+            "ADD    ESP,16" %}
+  ins_encode( long_mod(src1,src2) );
+  ins_pipe( pipe_slow );
+%}
+
+// Integer Shift Instructions
+// Shift Left by one
+instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+  match(Set dst (LShiftI dst shift));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "SHL    $dst,$shift" %}
+  opcode(0xD1, 0x4);  /* D1 /4 */
+  ins_encode( OpcP, RegOpc( dst ) );
+  ins_pipe( ialu_reg );
+%}
+
+// Shift Left by 8-bit immediate
+instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+  match(Set dst (LShiftI dst shift));
+  effect(KILL cr);
+
+  size(3);
+  format %{ "SHL    $dst,$shift" %}
+  opcode(0xC1, 0x4);  /* C1 /4 ib */
+  ins_encode( RegOpcImm( dst, shift) );
+  ins_pipe( ialu_reg );
+%}
+
+// Shift Left by variable
+instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+  match(Set dst (LShiftI dst shift));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "SHL    $dst,$shift" %}
+  opcode(0xD3, 0x4);  /* D3 /4 */
+  ins_encode( OpcP, RegOpc( dst ) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+// Arithmetic shift right by one
+instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+  match(Set dst (RShiftI dst shift));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "SAR    $dst,$shift" %}
+  opcode(0xD1, 0x7);  /* D1 /7 */
+  ins_encode( OpcP, RegOpc( dst ) );
+  ins_pipe( ialu_reg );
+%}
+
+// Arithmetic shift right by one
+instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
+  effect(KILL cr);
+  format %{ "SAR    $dst,$shift" %}
+  opcode(0xD1, 0x7);  /* D1 /7 */
+  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Arithmetic Shift Right by 8-bit immediate
+instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+  match(Set dst (RShiftI dst shift));
+  effect(KILL cr);
+
+  size(3);
+  format %{ "SAR    $dst,$shift" %}
+  opcode(0xC1, 0x7);  /* C1 /7 ib */
+  ins_encode( RegOpcImm( dst, shift ) );
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Arithmetic Shift Right by 8-bit immediate
+instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
+  effect(KILL cr);
+
+  format %{ "SAR    $dst,$shift" %}
+  opcode(0xC1, 0x7);  /* C1 /7 ib */
+  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Arithmetic Shift Right by variable
+instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+  match(Set dst (RShiftI dst shift));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "SAR    $dst,$shift" %}
+  opcode(0xD3, 0x7);  /* D3 /7 */
+  ins_encode( OpcP, RegOpc( dst ) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+// Logical shift right by one
+instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+  match(Set dst (URShiftI dst shift));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "SHR    $dst,$shift" %}
+  opcode(0xD1, 0x5);  /* D1 /5 */
+  ins_encode( OpcP, RegOpc( dst ) );
+  ins_pipe( ialu_reg );
+%}
+
+// Logical Shift Right by 8-bit immediate
+instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+  match(Set dst (URShiftI dst shift));
+  effect(KILL cr);
+
+  size(3);
+  format %{ "SHR    $dst,$shift" %}
+  opcode(0xC1, 0x5);  /* C1 /5 ib */
+  ins_encode( RegOpcImm( dst, shift) );
+  ins_pipe( ialu_reg );
+%}
+
+// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
+// This idiom is used by the compiler for the i2b bytecode.
+instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour, eFlagsReg cr) %{
+  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
+  effect(KILL cr);
+
+  size(3);
+  format %{ "MOVSX  $dst,$src :8" %}
+  opcode(0xBE, 0x0F);
+  ins_encode( OpcS, OpcP, RegReg( dst, src));
+  ins_pipe( ialu_reg_reg );
+%}
+
+// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
+// This idiom is used by the compiler the i2s bytecode.
+instruct i2s(eRegI dst, xRegI src, immI_16 sixteen, eFlagsReg cr) %{
+  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
+  effect(KILL cr);
+
+  size(3);
+  format %{ "MOVSX  $dst,$src :16" %}
+  opcode(0xBF, 0x0F);
+  ins_encode( OpcS, OpcP, RegReg( dst, src));
+  ins_pipe( ialu_reg_reg );
+%}
+
+
+// Logical Shift Right by variable
+instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+  match(Set dst (URShiftI dst shift));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "SHR    $dst,$shift" %}
+  opcode(0xD3, 0x5);  /* D3 /5 */
+  ins_encode( OpcP, RegOpc( dst ) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+
+//----------Logical Instructions-----------------------------------------------
+//----------Integer Logical Instructions---------------------------------------
+// And Instructions
+// And Register with Register
+instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (AndI dst src));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "AND    $dst,$src" %}
+  opcode(0x23);
+  ins_encode( OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+// And Register with Immediate
+instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+  match(Set dst (AndI dst src));
+  effect(KILL cr);
+
+  format %{ "AND    $dst,$src" %}
+  opcode(0x81,0x04);  /* Opcode 81 /4 */
+  // ins_encode( RegImm( dst, src) );
+  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
+  ins_pipe( ialu_reg );
+%}
+
+// And Register with Memory
+instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+  match(Set dst (AndI dst (LoadI src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "AND    $dst,$src" %}
+  opcode(0x23);
+  ins_encode( OpcP, RegMem( dst, src) );
+  ins_pipe( ialu_reg_mem );
+%}
+
+// And Memory with Register
+instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(150);
+  format %{ "AND    $dst,$src" %}
+  opcode(0x21);  /* Opcode 21 /r */
+  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// And Memory with Immediate
+instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "AND    $dst,$src" %}
+  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
+  // ins_encode( MemImm( dst, src) );
+  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Or Instructions
+// Or Register with Register
+instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (OrI dst src));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "OR     $dst,$src" %}
+  opcode(0x0B);
+  ins_encode( OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+// Or Register with Immediate
+instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+  match(Set dst (OrI dst src));
+  effect(KILL cr);
+
+  format %{ "OR     $dst,$src" %}
+  opcode(0x81,0x01);  /* Opcode 81 /1 id */
+  // ins_encode( RegImm( dst, src) );
+  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
+  ins_pipe( ialu_reg );
+%}
+
+// Or Register with Memory
+instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+  match(Set dst (OrI dst (LoadI src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "OR     $dst,$src" %}
+  opcode(0x0B);
+  ins_encode( OpcP, RegMem( dst, src) );
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Or Memory with Register
+instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(150);
+  format %{ "OR     $dst,$src" %}
+  opcode(0x09);  /* Opcode 09 /r */
+  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Or Memory with Immediate
+instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "OR     $dst,$src" %}
+  opcode(0x81,0x1);  /* Opcode 81 /1 id */
+  // ins_encode( MemImm( dst, src) );
+  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Xor Instructions
+// Xor Register with Register
+instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (XorI dst src));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "XOR    $dst,$src" %}
+  opcode(0x33);
+  ins_encode( OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+// Xor Register with Immediate
+instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+  match(Set dst (XorI dst src));
+  effect(KILL cr);
+
+  format %{ "XOR    $dst,$src" %}
+  opcode(0x81,0x06);  /* Opcode 81 /6 id */
+  // ins_encode( RegImm( dst, src) );
+  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
+  ins_pipe( ialu_reg );
+%}
+
+// Xor Register with Memory
+instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+  match(Set dst (XorI dst (LoadI src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "XOR    $dst,$src" %}
+  opcode(0x33);
+  ins_encode( OpcP, RegMem(dst, src) );
+  ins_pipe( ialu_reg_mem );
+%}
+
+// Xor Memory with Register
+instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(150);
+  format %{ "XOR    $dst,$src" %}
+  opcode(0x31);  /* Opcode 31 /r */
+  ins_encode( OpcP, RegMem( src, dst ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+// Xor Memory with Immediate
+instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
+  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
+  effect(KILL cr);
+
+  ins_cost(125);
+  format %{ "XOR    $dst,$src" %}
+  opcode(0x81,0x6);  /* Opcode 81 /6 id */
+  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
+  ins_pipe( ialu_mem_imm );
+%}
+
+//----------Convert Int to Boolean---------------------------------------------
+
+instruct movI_nocopy(eRegI dst, eRegI src) %{
+  effect( DEF dst, USE src );
+  format %{ "MOV    $dst,$src" %}
+  ins_encode( enc_Copy( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
+  effect( USE_DEF dst, USE src, KILL cr );
+
+  size(4);
+  format %{ "NEG    $dst\n\t"
+            "ADC    $dst,$src" %}
+  ins_encode( neg_reg(dst),
+              OpcRegReg(0x13,dst,src) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
+  match(Set dst (Conv2B src));
+
+  expand %{
+    movI_nocopy(dst,src);
+    ci2b(dst,src,cr);
+  %}
+%}
+
+instruct movP_nocopy(eRegI dst, eRegP src) %{
+  effect( DEF dst, USE src );
+  format %{ "MOV    $dst,$src" %}
+  ins_encode( enc_Copy( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
+  effect( USE_DEF dst, USE src, KILL cr );
+  format %{ "NEG    $dst\n\t"
+            "ADC    $dst,$src" %}
+  ins_encode( neg_reg(dst),
+              OpcRegReg(0x13,dst,src) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
+  match(Set dst (Conv2B src));
+
+  expand %{
+    movP_nocopy(dst,src);
+    cp2b(dst,src,cr);
+  %}
+%}
+
+instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
+  match(Set dst (CmpLTMask p q));
+  effect( KILL cr );
+  ins_cost(400);
+
+  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
+  format %{ "XOR    $dst,$dst\n\t"
+            "CMP    $p,$q\n\t"
+            "SETlt  $dst\n\t"
+            "NEG    $dst" %}
+  ins_encode( OpcRegReg(0x33,dst,dst),
+              OpcRegReg(0x3B,p,q),
+              setLT_reg(dst), neg_reg(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
+  match(Set dst (CmpLTMask dst zero));
+  effect( DEF dst, KILL cr );
+  ins_cost(100);
+
+  format %{ "SAR    $dst,31" %}
+  opcode(0xC1, 0x7);  /* C1 /7 ib */
+  ins_encode( RegOpcImm( dst, 0x1F ) );
+  ins_pipe( ialu_reg );
+%}
+
+
+instruct cadd_cmpLTMask1( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
+  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
+  effect( USE_KILL tmp, KILL cr );
+  ins_cost(400);
+  // annoyingly, $tmp has no edges so you cant ask for it in
+  // any format or encoding
+  format %{ "SUB    $p,$q\n\t"
+            "SBB    ECX,ECX\n\t"
+            "AND    ECX,$y\n\t"
+            "ADD    $p,ECX" %}
+  ins_encode( enc_cmpLTP(p,q,y,tmp) );
+  ins_pipe( pipe_cmplt );
+%}
+
+instruct cadd_cmpLTMask2( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
+  match(Set p (AddI (SubI p q) (AndI (CmpLTMask p q) y)));
+  effect( USE_KILL tmp, KILL cr );
+  ins_cost(400);
+
+  format %{ "SUB    $p,$q\n\t"
+            "SBB    ECX,ECX\n\t"
+            "AND    ECX,$y\n\t"
+            "ADD    $p,ECX" %}
+  ins_encode( enc_cmpLTP(p,q,y,tmp) );
+  ins_pipe( pipe_cmplt );
+%}
+
+/* If I enable these 2, I encourage spilling in the inner loop of compress.
+instruct cadd_cmpLTMask1_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
+  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
+  effect( USE_KILL tmp, KILL cr );
+  ins_cost(400);
+
+  format %{ "SUB    $p,$q\n\t"
+            "SBB    ECX,ECX\n\t"
+            "AND    ECX,$y\n\t"
+            "ADD    $p,ECX" %}
+  ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
+%}
+
+instruct cadd_cmpLTMask2_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
+  match(Set p (AddI (SubI p q) (AndI (CmpLTMask p q) (LoadI y))));
+  effect( USE_KILL tmp, KILL cr );
+  ins_cost(400);
+
+  format %{ "SUB    $p,$q\n\t"
+            "SBB    ECX,ECX\n\t"
+            "AND    ECX,$y\n\t"
+            "ADD    $p,ECX" %}
+  ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
+%}
+*/
+
+//----------Long Instructions------------------------------------------------
+// Add Long Register with Register
+instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
+  match(Set dst (AddL dst src));
+  effect(KILL cr);
+  ins_cost(200);
+  format %{ "ADD    $dst.lo,$src.lo\n\t"
+            "ADC    $dst.hi,$src.hi" %}
+  opcode(0x03, 0x13);
+  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// Add Long Register with Immediate
+instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
+  match(Set dst (AddL dst src));
+  effect(KILL cr);
+  format %{ "ADD    $dst.lo,$src.lo\n\t"
+            "ADC    $dst.hi,$src.hi" %}
+  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
+  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Add Long Register with Memory
+instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
+  match(Set dst (AddL dst (LoadL mem)));
+  effect(KILL cr);
+  ins_cost(125);
+  format %{ "ADD    $dst.lo,$mem\n\t"
+            "ADC    $dst.hi,$mem+4" %}
+  opcode(0x03, 0x13);
+  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_pipe( ialu_reg_long_mem );
+%}
+
+// Subtract Long Register with Register.
+instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
+  match(Set dst (SubL dst src));
+  effect(KILL cr);
+  ins_cost(200);
+  format %{ "SUB    $dst.lo,$src.lo\n\t"
+            "SBB    $dst.hi,$src.hi" %}
+  opcode(0x2B, 0x1B);
+  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// Subtract Long Register with Immediate
+instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
+  match(Set dst (SubL dst src));
+  effect(KILL cr);
+  format %{ "SUB    $dst.lo,$src.lo\n\t"
+            "SBB    $dst.hi,$src.hi" %}
+  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
+  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Subtract Long Register with Memory
+instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
+  match(Set dst (SubL dst (LoadL mem)));
+  effect(KILL cr);
+  ins_cost(125);
+  format %{ "SUB    $dst.lo,$mem\n\t"
+            "SBB    $dst.hi,$mem+4" %}
+  opcode(0x2B, 0x1B);
+  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_pipe( ialu_reg_long_mem );
+%}
+
+instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
+  match(Set dst (SubL zero dst));
+  effect(KILL cr);
+  ins_cost(300);
+  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
+  ins_encode( neg_long(dst) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// And Long Register with Register
+instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
+  match(Set dst (AndL dst src));
+  effect(KILL cr);
+  format %{ "AND    $dst.lo,$src.lo\n\t"
+            "AND    $dst.hi,$src.hi" %}
+  opcode(0x23,0x23);
+  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// And Long Register with Immediate
+instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
+  match(Set dst (AndL dst src));
+  effect(KILL cr);
+  format %{ "AND    $dst.lo,$src.lo\n\t"
+            "AND    $dst.hi,$src.hi" %}
+  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
+  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// And Long Register with Memory
+instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
+  match(Set dst (AndL dst (LoadL mem)));
+  effect(KILL cr);
+  ins_cost(125);
+  format %{ "AND    $dst.lo,$mem\n\t"
+            "AND    $dst.hi,$mem+4" %}
+  opcode(0x23, 0x23);
+  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_pipe( ialu_reg_long_mem );
+%}
+
+// Or Long Register with Register
+instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
+  match(Set dst (OrL dst src));
+  effect(KILL cr);
+  format %{ "OR     $dst.lo,$src.lo\n\t"
+            "OR     $dst.hi,$src.hi" %}
+  opcode(0x0B,0x0B);
+  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// Or Long Register with Immediate
+instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
+  match(Set dst (OrL dst src));
+  effect(KILL cr);
+  format %{ "OR     $dst.lo,$src.lo\n\t"
+            "OR     $dst.hi,$src.hi" %}
+  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
+  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Or Long Register with Memory
+instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
+  match(Set dst (OrL dst (LoadL mem)));
+  effect(KILL cr);
+  ins_cost(125);
+  format %{ "OR     $dst.lo,$mem\n\t"
+            "OR     $dst.hi,$mem+4" %}
+  opcode(0x0B,0x0B);
+  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_pipe( ialu_reg_long_mem );
+%}
+
+// Xor Long Register with Register
+instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
+  match(Set dst (XorL dst src));
+  effect(KILL cr);
+  format %{ "XOR    $dst.lo,$src.lo\n\t"
+            "XOR    $dst.hi,$src.hi" %}
+  opcode(0x33,0x33);
+  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// Xor Long Register with Immediate
+instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
+  match(Set dst (XorL dst src));
+  effect(KILL cr);
+  format %{ "XOR    $dst.lo,$src.lo\n\t"
+            "XOR    $dst.hi,$src.hi" %}
+  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
+  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Xor Long Register with Memory
+instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
+  match(Set dst (XorL dst (LoadL mem)));
+  effect(KILL cr);
+  ins_cost(125);
+  format %{ "XOR    $dst.lo,$mem\n\t"
+            "XOR    $dst.hi,$mem+4" %}
+  opcode(0x33,0x33);
+  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
+  ins_pipe( ialu_reg_long_mem );
+%}
+
+// Shift Left Long by 1-31
+instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
+  match(Set dst (LShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(200);
+  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
+            "SHL    $dst.lo,$cnt" %}
+  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
+  ins_encode( move_long_small_shift(dst,cnt) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Shift Left Long by 32-63
+instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
+  match(Set dst (LShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(300);
+  format %{ "MOV    $dst.hi,$dst.lo\n"
+          "\tSHL    $dst.hi,$cnt-32\n"
+          "\tXOR    $dst.lo,$dst.lo" %}
+  opcode(0xC1, 0x4);  /* C1 /4 ib */
+  ins_encode( move_long_big_shift_clr(dst,cnt) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Shift Left Long by variable
+instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
+  match(Set dst (LShiftL dst shift));
+  effect(KILL cr);
+  ins_cost(500+200);
+  size(17);
+  format %{ "TEST   $shift,32\n\t"
+            "JEQ,s  small\n\t"
+            "MOV    $dst.hi,$dst.lo\n\t"
+            "XOR    $dst.lo,$dst.lo\n"
+    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
+            "SHL    $dst.lo,$shift" %}
+  ins_encode( shift_left_long( dst, shift ) );
+  ins_pipe( pipe_slow );
+%}
+
+// Shift Right Long by 1-31
+instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
+  match(Set dst (URShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(200);
+  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
+            "SHR    $dst.hi,$cnt" %}
+  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
+  ins_encode( move_long_small_shift(dst,cnt) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Shift Right Long by 32-63
+instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
+  match(Set dst (URShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(300);
+  format %{ "MOV    $dst.lo,$dst.hi\n"
+          "\tSHR    $dst.lo,$cnt-32\n"
+          "\tXOR    $dst.hi,$dst.hi" %}
+  opcode(0xC1, 0x5);  /* C1 /5 ib */
+  ins_encode( move_long_big_shift_clr(dst,cnt) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Shift Right Long by variable
+instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
+  match(Set dst (URShiftL dst shift));
+  effect(KILL cr);
+  ins_cost(600);
+  size(17);
+  format %{ "TEST   $shift,32\n\t"
+            "JEQ,s  small\n\t"
+            "MOV    $dst.lo,$dst.hi\n\t"
+            "XOR    $dst.hi,$dst.hi\n"
+    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
+            "SHR    $dst.hi,$shift" %}
+  ins_encode( shift_right_long( dst, shift ) );
+  ins_pipe( pipe_slow );
+%}
+
+// Shift Right Long by 1-31
+instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
+  match(Set dst (RShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(200);
+  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
+            "SAR    $dst.hi,$cnt" %}
+  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
+  ins_encode( move_long_small_shift(dst,cnt) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Shift Right Long by 32-63
+instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
+  match(Set dst (RShiftL dst cnt));
+  effect(KILL cr);
+  ins_cost(300);
+  format %{ "MOV    $dst.lo,$dst.hi\n"
+          "\tSAR    $dst.lo,$cnt-32\n"
+          "\tSAR    $dst.hi,31" %}
+  opcode(0xC1, 0x7);  /* C1 /7 ib */
+  ins_encode( move_long_big_shift_sign(dst,cnt) );
+  ins_pipe( ialu_reg_long );
+%}
+
+// Shift Right arithmetic Long by variable
+instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
+  match(Set dst (RShiftL dst shift));
+  effect(KILL cr);
+  ins_cost(600);
+  size(18);
+  format %{ "TEST   $shift,32\n\t"
+            "JEQ,s  small\n\t"
+            "MOV    $dst.lo,$dst.hi\n\t"
+            "SAR    $dst.hi,31\n"
+    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
+            "SAR    $dst.hi,$shift" %}
+  ins_encode( shift_right_arith_long( dst, shift ) );
+  ins_pipe( pipe_slow );
+%}
+
+
+//----------Double Instructions------------------------------------------------
+// Double Math
+
+// Compare & branch
+
+// P6 version of float compare, sets condition codes in EFLAGS
+instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI eax) %{
+  predicate(VM_Version::supports_cmov() && UseSSE <=1);
+  match(Set cr (CmpD src1 src2));
+  effect(KILL eax);
+  ins_cost(150);
+  format %{ "FLD    $src1\n\t"
+            "FUCOMIP ST,$src2  // P6 instruction\n\t"
+            "JNP    exit\n\t"
+            "MOV    ah,1       // saw a NaN, set CF\n\t"
+            "SAHF\n"
+     "exit:\tNOP               // avoid branch to branch" %}
+  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
+  ins_encode( Push_Reg_D(src1),
+              OpcP, RegOpc(src2),
+              cmpF_P6_fixup );
+  ins_pipe( pipe_slow );
+%}
+
+// Compare & branch
+instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI eax) %{
+  predicate(UseSSE<=1);
+  match(Set cr (CmpD src1 src2));
+  effect(KILL eax);
+  ins_cost(200);
+  format %{ "FLD    $src1\n\t"
+            "FCOMp  $src2\n\t"
+            "FNSTSW AX\n\t"
+            "TEST   AX,0x400\n\t"
+            "JZ,s   flags\n\t"
+            "MOV    AH,1\t# unordered treat as LT\n"
+    "flags:\tSAHF" %}
+  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
+  ins_encode( Push_Reg_D(src1),
+              OpcP, RegOpc(src2),
+              fpu_flags);
+  ins_pipe( pipe_slow );
+%}
+
+// Compare vs zero into -1,0,1
+instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI eax, eFlagsReg cr) %{
+  predicate(UseSSE<=1);
+  match(Set dst (CmpD3 src1 zero));
+  effect(KILL cr, KILL eax);
+  ins_cost(280);
+  format %{ "FTSTL  $dst,$src1" %}
+  opcode(0xE4, 0xD9);
+  ins_encode( Push_Reg_D(src1),
+              OpcS, OpcP, PopFPU,
+              CmpF_Result(dst));
+  ins_pipe( pipe_slow );
+%}
+
+// Compare into -1,0,1
+instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI eax, eFlagsReg cr) %{
+  predicate(UseSSE<=1);
+  match(Set dst (CmpD3 src1 src2));
+  effect(KILL cr, KILL eax);
+  ins_cost(300);
+  format %{ "FCMPL  $dst,$src1,$src2" %}
+  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
+  ins_encode( Push_Reg_D(src1),
+              OpcP, RegOpc(src2),
+              CmpF_Result(dst));
+  ins_pipe( pipe_slow );
+%}
+
+// float compare and set condition codes in EFLAGS by XMM regs
+instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI eax) %{
+  predicate(UseSSE==2);
+  match(Set cr (CmpD dst src));
+  effect(KILL eax);
+  ins_cost(145);
+  format %{ "COMISD $dst,$src\n"
+          "\tJNP    exit\n"
+          "\tMOV    ah,1       // saw a NaN, set CF\n"
+          "\tSAHF\n"
+     "exit:\tNOP               // avoid branch to branch" %}
+  opcode(0x66, 0x0F, 0x2F);
+  ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
+  ins_pipe( pipe_slow );
+%}
+
+// float compare and set condition codes in EFLAGS by XMM regs
+instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI eax) %{
+  predicate(UseSSE==2);
+  match(Set cr (CmpD dst (LoadD src)));
+  effect(KILL eax);
+  ins_cost(145);
+  format %{ "COMISD $dst,$src\n"
+          "\tJNP    exit\n"
+          "\tMOV    ah,1       // saw a NaN, set CF\n"
+          "\tSAHF\n"
+     "exit:\tNOP               // avoid branch to branch" %}
+  opcode(0x66, 0x0F, 0x2F);
+  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
+  ins_pipe( pipe_slow );
+%}
+
+// Compare into -1,0,1 in XMM
+instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
+  predicate(UseSSE==2);
+  match(Set dst (CmpD3 src1 src2));
+  effect(KILL cr);
+  ins_cost(275);
+  format %{ "XOR    $dst,$dst\n"
+          "\tCOMISD $src1,$src2\n"
+          "\tJP,s   nan\n"
+          "\tJEQ,s  exit\n"
+          "\tJA,s   inc\n"
+      "nan:\tDEC    $dst\n"
+          "\tJMP,s  exit\n"
+      "inc:\tINC    $dst\n"
+      "exit:"
+                %}
+  opcode(0x66, 0x0F, 0x2F);
+  ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
+             CmpX_Result(dst));
+  ins_pipe( pipe_slow );
+%}
+
+// Compare into -1,0,1 in XMM and memory
+instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
+  predicate(UseSSE==2);
+  match(Set dst (CmpD3 src1 (LoadD mem)));
+  effect(KILL cr);
+  ins_cost(275);
+  format %{ "COMISD $src1,$mem\n"
+          "\tMOV    $dst,0\t\t# do not blow flags\n"
+          "\tJP,s   nan\n"
+          "\tJEQ,s  exit\n"
+          "\tJA,s   inc\n"
+      "nan:\tDEC    $dst\n"
+          "\tJMP,s  exit\n"
+      "inc:\tINC    $dst\n"
+      "exit:"
+                %}
+  opcode(0x66, 0x0F, 0x2F);
+  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
+             LdImmI(dst,0x0), CmpX_Result(dst));
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct subD_reg(regD dst, regD src) %{
+  predicate (UseSSE <=1);
+  match(Set dst (SubD dst src));
+
+  format %{ "FLD    $src\n\t"
+            "DSUBp  $dst,ST" %}
+  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
+  ins_cost(150);
+  ins_encode( Push_Reg_D(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+  predicate (UseSSE <=1);
+  match(Set dst (RoundDouble (SubD src1 src2)));
+  ins_cost(250);
+
+  format %{ "FLD    $src2\n\t"
+            "DSUB   ST,$src1\n\t"
+            "FSTP_D $dst\t# D-round" %}
+  opcode(0xD8, 0x5);
+  ins_encode( Push_Reg_D(src2),
+              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+  ins_pipe( fpu_mem_reg_reg );
+%}
+
+
+instruct subD_reg_mem(regD dst, memory src) %{
+  predicate (UseSSE <=1);
+  match(Set dst (SubD dst (LoadD src)));
+  ins_cost(150);
+
+  format %{ "FLD    $src\n\t"
+            "DSUBp  $dst,ST" %}
+  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+instruct absD_reg(regDPR1 dst, regDPR1 src) %{
+  predicate (UseSSE<=1);
+  match(Set dst (AbsD src));
+  ins_cost(100);
+  format %{ "FABS" %}
+  opcode(0xE1, 0xD9);
+  ins_encode( OpcS, OpcP );
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct absXD_reg( regXD dst ) %{
+  predicate(UseSSE==2);
+  match(Set dst (AbsD dst));
+  format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
+  ins_encode( AbsXD_encoding(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct negD_reg(regDPR1 dst, regDPR1 src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (NegD src));
+  ins_cost(100);
+  format %{ "FCHS" %}
+  opcode(0xE0, 0xD9);
+  ins_encode( OpcS, OpcP );
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct negXD_reg( regXD dst ) %{
+  predicate(UseSSE==2);
+  match(Set dst (NegD dst));
+  format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
+  ins_encode( NegXD_encoding(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct addD_reg(regD dst, regD src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (AddD dst src));
+  format %{ "FLD    $src\n\t"
+            "DADD   $dst,ST" %}
+  size(4);
+  ins_cost(150);
+  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
+  ins_encode( Push_Reg_D(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+
+instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+  predicate(UseSSE<=1);
+  match(Set dst (RoundDouble (AddD src1 src2)));
+  ins_cost(250);
+
+  format %{ "FLD    $src2\n\t"
+            "DADD   ST,$src1\n\t"
+            "FSTP_D $dst\t# D-round" %}
+  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
+  ins_encode( Push_Reg_D(src2),
+              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+  ins_pipe( fpu_mem_reg_reg );
+%}
+
+
+instruct addD_reg_mem(regD dst, memory src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (AddD dst (LoadD src)));
+  ins_cost(150);
+
+  format %{ "FLD    $src\n\t"
+            "DADDp  $dst,ST" %}
+  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+// add-to-memory
+instruct addD_mem_reg(memory dst, regD src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
+  ins_cost(150);
+
+  format %{ "FLD_D  $dst\n\t"
+            "DADD   ST,$src\n\t"
+            "FST_D  $dst" %}
+  opcode(0xDD, 0x0);
+  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
+              Opcode(0xD8), RegOpc(src),
+              set_instruction_start,
+              Opcode(0xDD), RMopc_Mem(0x03,dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+instruct addD_reg_imm1(regD dst, immD1 src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (AddD dst src));
+  ins_cost(125);
+  format %{ "FLD1\n\t"
+            "DADDp  $dst,ST" %}
+  opcode(0xDE, 0x00);
+  ins_encode( LdImmD(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg );
+%}
+
+instruct addD_reg_imm(regD dst, immD src) %{
+  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
+  match(Set dst (AddD dst src));
+  ins_cost(200);
+  format %{ "FLD_D  [$src]\n\t"
+            "DADDp  $dst,ST" %}
+  opcode(0xDE, 0x00);       /* DE /0 */
+  ins_encode( LdImmD(src),
+              OpcP, RegOpc(dst));
+  ins_pipe( fpu_reg_mem );
+%}
+
+instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
+  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
+  match(Set dst (RoundDouble (AddD src con)));
+  ins_cost(200);
+  format %{ "FLD_D  [$con]\n\t"
+            "DADD   ST,$src\n\t"
+            "FSTP_D $dst\t# D-round" %}
+  opcode(0xD8, 0x00);       /* D8 /0 */
+  ins_encode( LdImmD(con),
+              OpcP, RegOpc(src), Pop_Mem_D(dst));
+  ins_pipe( fpu_mem_reg_con );
+%}
+
+// Add two double precision floating point values in xmm
+instruct addXD_reg(regXD dst, regXD src) %{
+  predicate(UseSSE==2);
+  match(Set dst (AddD dst src));
+  format %{ "ADDSD  $dst,$src" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct addXD_imm(regXD dst, immXD con) %{
+  predicate(UseSSE==2);
+  match(Set dst (AddD dst con));
+  format %{ "ADDSD  $dst,[$con]" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct addXD_mem(regXD dst, memory mem) %{
+  predicate(UseSSE==2);
+  match(Set dst (AddD dst (LoadD mem)));
+  format %{ "ADDSD  $dst,$mem" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Sub two double precision floating point values in xmm
+instruct subXD_reg(regXD dst, regXD src) %{
+  predicate(UseSSE==2);
+  match(Set dst (SubD dst src));
+  format %{ "SUBSD  $dst,$src" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct subXD_imm(regXD dst, immXD con) %{
+  predicate(UseSSE==2);
+  match(Set dst (SubD dst con));
+  format %{ "SUBSD  $dst,[$con]" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct subXD_mem(regXD dst, memory mem) %{
+  predicate(UseSSE==2);
+  match(Set dst (SubD dst (LoadD mem)));
+  format %{ "SUBSD  $dst,$mem" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Mul two double precision floating point values in xmm
+instruct mulXD_reg(regXD dst, regXD src) %{
+  predicate(UseSSE==2);
+  match(Set dst (MulD dst src));
+  format %{ "MULSD  $dst,$src" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulXD_imm(regXD dst, immXD con) %{
+  predicate(UseSSE==2);
+  match(Set dst (MulD dst con));
+  format %{ "MULSD  $dst,[$con]" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulXD_mem(regXD dst, memory mem) %{
+  predicate(UseSSE==2);
+  match(Set dst (MulD dst (LoadD mem)));
+  format %{ "MULSD  $dst,$mem" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Div two double precision floating point values in xmm
+instruct divXD_reg(regXD dst, regXD src) %{
+  predicate(UseSSE==2);
+  match(Set dst (DivD dst src));
+  format %{ "DIVSD  $dst,$src" %}
+  opcode(0xF2, 0x0F, 0x5E);
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct divXD_imm(regXD dst, immXD con) %{
+  predicate(UseSSE==2);
+  match(Set dst (DivD dst con));
+  format %{ "DIVSD  $dst,[$con]" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
+  ins_pipe( pipe_slow );
+%}
+
+instruct divXD_mem(regXD dst, memory mem) %{
+  predicate(UseSSE==2);
+  match(Set dst (DivD dst (LoadD mem)));
+  format %{ "DIVSD  $dst,$mem" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct mulD_reg(regD dst, regD src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (MulD dst src));
+  format %{ "FLD    $src\n\t"
+            "DMULp  $dst,ST" %}
+  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
+  ins_cost(150);
+  ins_encode( Push_Reg_D(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Strict FP instruction biases argument before multiply then
+// biases result to avoid double rounding of subnormals.
+//
+// scale arg1 by multiplying arg1 by 2^(-15360)
+// load arg2
+// multiply scaled arg1 by arg2
+// rescale product by 2^(15360)
+//
+instruct strictfp_mulD_reg(regD dst, regD src) %{
+  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
+  match(Set dst (MulD dst src));
+  ins_cost(1);   // Select this instruction for all strict FP double multiplies
+
+  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
+            "DMULp  $dst,ST\n\t"
+            "FLD    $src\n\t"
+            "DMULp  $dst,ST\n\t"
+            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
+            "DMULp  $dst,ST\n\t" %}
+  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
+  ins_encode( strictfp_bias1(dst),
+              Push_Reg_D(src),
+              OpcP, RegOpc(dst),
+              strictfp_bias2(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct mulD_reg_imm(regD dst, immD src) %{
+  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
+  match(Set dst (MulD dst src));
+  ins_cost(200);
+  format %{ "FLD_D  [$src]\n\t"
+            "DMULp  $dst,ST" %}
+  opcode(0xDE, 0x1); /* DE /1 */
+  ins_encode( LdImmD(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+
+instruct mulD_reg_mem(regD dst, memory src) %{
+  predicate( UseSSE<=1 );
+  match(Set dst (MulD dst (LoadD src)));
+  ins_cost(200);
+  format %{ "FLD_D  $src\n\t"
+            "DMULp  $dst,ST" %}
+  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+//
+// Cisc-alternate to reg-reg multiply
+instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
+  predicate( UseSSE<=1 );
+  match(Set dst (MulD src (LoadD mem)));
+  ins_cost(250);
+  format %{ "FLD_D  $mem\n\t"
+            "DMUL   ST,$src\n\t"
+            "FSTP_D $dst" %}
+  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
+              OpcReg_F(src),
+              Pop_Reg_D(dst) );
+  ins_pipe( fpu_reg_reg_mem );
+%}
+
+
+// MACRO3 -- addD a mulD
+// This instruction is a '2-address' instruction in that the result goes
+// back to src2.  This eliminates a move from the macro; possibly the
+// register allocator will have to add it back (and maybe not).
+instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
+  predicate( UseSSE<=1 );
+  match(Set src2 (AddD (MulD src0 src1) src2));
+  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
+            "DMUL   ST,$src1\n\t"
+            "DADDp  $src2,ST" %}
+  ins_cost(250);
+  opcode(0xDD); /* LoadD DD /0 */
+  ins_encode( Push_Reg_F(src0),
+              FMul_ST_reg(src1),
+              FAddP_reg_ST(src2) );
+  ins_pipe( fpu_reg_reg_reg );
+%}
+
+
+// MACRO3 -- subD a mulD
+instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
+  predicate( UseSSE<=1 );
+  match(Set src2 (SubD (MulD src0 src1) src2));
+  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
+            "DMUL   ST,$src1\n\t"
+            "DSUBRp $src2,ST" %}
+  ins_cost(250);
+  ins_encode( Push_Reg_F(src0),
+              FMul_ST_reg(src1),
+              Opcode(0xDE), Opc_plus(0xE0,src2));
+  ins_pipe( fpu_reg_reg_reg );
+%}
+
+
+instruct divD_reg(regD dst, regD src) %{
+  predicate( UseSSE<=1 );
+  match(Set dst (DivD dst src));
+
+  format %{ "FLD    $src\n\t"
+            "FDIVp  $dst,ST" %}
+  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
+  ins_cost(150);
+  ins_encode( Push_Reg_D(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Strict FP instruction biases argument before division then
+// biases result, to avoid double rounding of subnormals.
+//
+// scale dividend by multiplying dividend by 2^(-15360)
+// load divisor
+// divide scaled dividend by divisor
+// rescale quotient by 2^(15360)
+//
+instruct strictfp_divD_reg(regD dst, regD src) %{
+  predicate (UseSSE<=1);
+  match(Set dst (DivD dst src));
+  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
+  ins_cost(01);
+
+  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
+            "DMULp  $dst,ST\n\t"
+            "FLD    $src\n\t"
+            "FDIVp  $dst,ST\n\t"
+            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
+            "DMULp  $dst,ST\n\t" %}
+  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
+  ins_encode( strictfp_bias1(dst),
+              Push_Reg_D(src),
+              OpcP, RegOpc(dst),
+              strictfp_bias2(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+  predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
+  match(Set dst (RoundDouble (DivD src1 src2)));
+
+  format %{ "FLD    $src1\n\t"
+            "FDIV   ST,$src2\n\t"
+            "FSTP_D $dst\t# D-round" %}
+  opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
+  ins_encode( Push_Reg_D(src1),
+              OpcP, RegOpc(src2), Pop_Mem_D(dst) );
+  ins_pipe( fpu_mem_reg_reg );
+%}
+
+
+instruct modD_reg(regD dst, regD src, eAXRegI eax, eFlagsReg cr) %{
+  predicate(UseSSE<=1);
+  match(Set dst (ModD dst src));
+  effect(KILL eax, KILL cr); // emitModD() uses EAX and EFLAGS
+
+  format %{ "DMOD   $dst,$src" %}
+  ins_cost(250);
+  ins_encode(Push_Reg_Mod_D(dst, src),
+              emitModD(),
+              Push_Result_Mod_D(src),
+              Pop_Reg_D(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI eax, regFPR1 tmp, eFlagsReg cr) %{
+  predicate(UseSSE==2);
+  match(Set dst (ModD src0 src1));
+  effect(KILL eax, KILL tmp, KILL cr);
+
+  format %{ "SUB    ESP,8\n"
+          "\tMOVSD  [ESP+0],$src1\n"
+          "\tFPOP\n"
+          "\tFLD_D  [ESP+0]\n"
+          "\tMOVSD  [ESP+0],$src0\n"
+          "\tFLD_D  [ESP+0]\n"
+     "loop:\tFPREM\n"
+          "\tFWAIT\n"
+          "\tFNSTSW AX\n"
+          "\tSAHF\n"
+          "\tJP     loop\n"
+          "\tFSTP_D [ESP+0]\n"
+          "\tMOVSD  $dst,[ESP+0]\n"
+          "\tADD    ESP,8"
+    %}
+  ins_cost(250);
+  ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
+  predicate (UseSSE<=1);
+  match(Set dst (SinD src));
+  ins_cost(1800);
+  format %{ "DSIN" %}
+  opcode(0xD9, 0xFE);
+  ins_encode( OpcP, OpcS );
+  ins_pipe( pipe_slow );
+%}
+
+instruct sinXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+  predicate (UseSSE==2);
+  match(Set dst (SinD src));
+  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  ins_cost(1800);
+  format %{ "DSIN" %}
+  opcode(0xD9, 0xFE);
+  ins_encode( Push_SrcXD(src), OpcP, OpcS, Push_ResultXD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
+  predicate (UseSSE<=1);
+  match(Set dst (CosD src));
+  ins_cost(1800);
+  format %{ "DCOS" %}
+  opcode(0xD9, 0xFF);
+  ins_encode( OpcP, OpcS );
+  ins_pipe( pipe_slow );
+%}
+
+instruct cosXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+  predicate (UseSSE==2);
+  match(Set dst (CosD src));
+  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  ins_cost(1800);
+  format %{ "DCOS" %}
+  opcode(0xD9, 0xFF);
+  ins_encode( Push_SrcXD(src), OpcP, OpcS, Push_ResultXD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct tanD_reg(regD dst, regD src) %{
+  predicate (UseSSE<=1);
+  match(Set dst(TanD src));
+  format %{ "DTAN   $dst,$src" %}
+  opcode(0xD9, 0xF2);
+  ins_encode( Push_Reg_D(src),
+              OpcP, OpcS, Pop_Reg_D(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct tanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+  predicate (UseSSE==2);
+  match(Set dst(TanD src));
+  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  format %{ "DTAN   $dst,$src" %}
+  opcode(0xD9, 0xF2);
+  ins_encode( Push_SrcXD(src),
+              OpcP, OpcS, Push_ResultXD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct atanD_reg(regD dst, regD src) %{
+  predicate (UseSSE<=1);
+  match(Set dst(AtanD dst src));
+  format %{ "DATA   $dst,$src" %}
+  opcode(0xD9, 0xF3);
+  ins_encode( Push_Reg_D(src),
+              OpcP, OpcS, RegOpc(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+  predicate (UseSSE==2);
+  match(Set dst(AtanD dst src));
+  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  format %{ "DATA   $dst,$src" %}
+  opcode(0xD9, 0xF3);
+  ins_encode( Push_SrcXD(src),
+              OpcP, OpcS, Push_ResultXD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct sqrtD_reg(regD dst, regD src) %{
+  predicate (UseSSE<=1);
+  match(Set dst (SqrtD src));
+  format %{ "DSQRT  $dst,$src" %}
+  opcode(0xFA, 0xD9);
+  ins_encode( Push_Reg_D(src),
+              OpcS, OpcP, Pop_Reg_D(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct powD_reg(regDPR1 X, regDPR2 Y) %{
+  match(Set X (PowD X Y));
+  effect(KILL Y);
+  format %{ "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
+            "FDUP   \t\t\t# Q Q\n\t"
+            "FRNDINT\t\t\t# int(Q) Q\n\t"
+            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
+            "FXCH   ST(1)\t\t# frac(Q) int(Q)\n\t"
+            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
+            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
+            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADDP [1.000] instead
+            "FSCALE \t\t\t# 2^int(Q)*2^frac(Q)=2^Q int(Q)"
+             %}
+  ins_encode( Opcode(0xD9), Opcode(0xF1),   // fyl2x
+              Opcode(0xD9), Opcode(0xC0),   // fdup = fld st(0)
+              Opcode(0xD9), Opcode(0xFC),   // frndint
+              Opcode(0xDC), Opcode(0xE9),   // fsub st(1) -= st(0)
+              Opcode(0xD9), Opcode(0xC9),   // fxch st(1)
+              Opcode(0xD9), Opcode(0xF0),   // f2xm1
+              Opcode(0xD9), Opcode(0xE8),   // fld1
+              Opcode(0xDE), Opcode(0xC1),   // faddp
+              Opcode(0xD9), Opcode(0xFD) ); // fscale
+  ins_pipe( pipe_slow );
+%}
+
+//-------------Float Instructions-------------------------------
+// Float Math
+
+// Code for float compare:
+//     fcompp();
+//     fwait(); fnstsw_ax();
+//     sahf();
+//     movl(dst, unordered_result);
+//     jcc(Assembler::parity, exit);
+//     movl(dst, less_result);
+//     jcc(Assembler::below, exit);
+//     movl(dst, equal_result);
+//     jcc(Assembler::equal, exit);
+//     movl(dst, greater_result);
+//   exit:
+
+// P6 version of float compare, sets condition codes in EFLAGS
+instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI eax) %{
+  predicate(VM_Version::supports_cmov() && UseSSE == 0);
+  match(Set cr (CmpF src1 src2));
+  effect(KILL eax);
+  ins_cost(150);
+  format %{ "FLD    $src1\n\t"
+            "FUCOMIP ST,$src2  // P6 instruction\n\t"
+            "JNP    exit\n\t"
+            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
+            "SAHF\n"
+     "exit:\tNOP               // avoid branch to branch" %}
+  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
+  ins_encode( Push_Reg_D(src1),
+              OpcP, RegOpc(src2),
+              cmpF_P6_fixup );
+  ins_pipe( pipe_slow );
+%}
+
+
+// Compare & branch
+instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI eax) %{
+  predicate(UseSSE == 0);
+  match(Set cr (CmpF src1 src2));
+  effect(KILL eax);
+  ins_cost(200);
+  format %{ "FLD    $src1\n\t"
+            "FCOMp  $src2\n\t"
+            "FNSTSW AX\n\t"
+            "TEST   AX,0x400\n\t"
+            "JZ,s   flags\n\t"
+            "MOV    AH,1\t# unordered treat as LT\n"
+    "flags:\tSAHF" %}
+  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
+  ins_encode( Push_Reg_D(src1),
+              OpcP, RegOpc(src2),
+              fpu_flags);
+  ins_pipe( pipe_slow );
+%}
+
+// Compare vs zero into -1,0,1
+instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI eax, eFlagsReg cr) %{
+  predicate(UseSSE == 0);
+  match(Set dst (CmpF3 src1 zero));
+  effect(KILL cr, KILL eax);
+  ins_cost(280);
+  format %{ "FTSTL  $dst,$src1" %}
+  opcode(0xE4, 0xD9);
+  ins_encode( Push_Reg_D(src1),
+              OpcS, OpcP, PopFPU,
+              CmpF_Result(dst));
+  ins_pipe( pipe_slow );
+%}
+
+// Compare into -1,0,1
+instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI eax, eFlagsReg cr) %{
+  predicate(UseSSE == 0);
+  match(Set dst (CmpF3 src1 src2));
+  effect(KILL cr, KILL eax);
+  ins_cost(300);
+  format %{ "FCMPL  $dst,$src1,$src2" %}
+  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
+  ins_encode( Push_Reg_D(src1),
+              OpcP, RegOpc(src2),
+              CmpF_Result(dst));
+  ins_pipe( pipe_slow );
+%}
+
+// float compare and set condition codes in EFLAGS by XMM regs
+instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI eax) %{
+  predicate(UseSSE>=1);
+  match(Set cr (CmpF dst src));
+  effect(KILL eax);
+  ins_cost(145);
+  format %{ "COMISS $dst,$src\n"
+          "\tJNP    exit\n"
+          "\tMOV    ah,1       // saw a NaN, set CF\n"
+          "\tSAHF\n"
+     "exit:\tNOP               // avoid branch to branch" %}
+  opcode(0x0F, 0x2F);
+  ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
+  ins_pipe( pipe_slow );
+%}
+
+// float compare and set condition codes in EFLAGS by XMM regs
+instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI eax) %{
+  predicate(UseSSE>=1);
+  match(Set cr (CmpF dst (LoadF src)));
+  effect(KILL eax);
+  ins_cost(145);
+  format %{ "COMISS $dst,$src\n"
+          "\tJNP    exit\n"
+          "\tMOV    ah,1       // saw a NaN, set CF\n"
+          "\tSAHF\n"
+     "exit:\tNOP               // avoid branch to branch" %}
+  opcode(0x0F, 0x2F);
+  ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
+  ins_pipe( pipe_slow );
+%}
+
+// Compare into -1,0,1 in XMM
+instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
+  predicate(UseSSE>=1);
+  match(Set dst (CmpF3 src1 src2));
+  effect(KILL cr);
+  ins_cost(275);
+  format %{ "XOR    $dst,$dst\n"
+          "\tCOMISS $src1,$src2\n"
+          "\tJP,s   nan\n"
+          "\tJEQ,s  exit\n"
+          "\tJA,s   inc\n"
+      "nan:\tDEC    $dst\n"
+          "\tJMP,s  exit\n"
+      "inc:\tINC    $dst\n"
+      "exit:"
+                %}
+  opcode(0x0F, 0x2F);
+  ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
+  ins_pipe( pipe_slow );
+%}
+
+// Compare into -1,0,1 in XMM and memory
+instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
+  predicate(UseSSE>=1);
+  match(Set dst (CmpF3 src1 (LoadF mem)));
+  effect(KILL cr);
+  ins_cost(275);
+  format %{ "COMISS $src1,$mem\n"
+          "\tMOV    $dst,0\t\t# do not blow flags\n"
+          "\tJP,s   nan\n"
+          "\tJEQ,s  exit\n"
+          "\tJA,s   inc\n"
+      "nan:\tDEC    $dst\n"
+          "\tJMP,s  exit\n"
+      "inc:\tINC    $dst\n"
+      "exit:"
+                %}
+  opcode(0x0F, 0x2F);
+  ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
+  ins_pipe( pipe_slow );
+%}
+
+// Spill to obtain 24-bit precision
+instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (SubF src1 src2));
+
+  format %{ "FSUB   $dst,$src1 - $src2" %}
+  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
+  ins_encode( Push_Reg_F(src1),
+              OpcReg_F(src2),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_reg_reg );
+%}
+//
+// This instruction does not round to 24-bits
+instruct subF_reg(regF dst, regF src) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (SubF dst src));
+
+  format %{ "FSUB   $dst,$src" %}
+  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
+  ins_encode( Push_Reg_F(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Spill to obtain 24-bit precision
+instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF src1 src2));
+
+  format %{ "FADD   $dst,$src1,$src2" %}
+  opcode(0xD8, 0x0); /* D8 C0+i */
+  ins_encode( Push_Reg_F(src2),
+              OpcReg_F(src1),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_reg_reg );
+%}
+//
+// This instruction does not round to 24-bits
+instruct addF_reg(regF dst, regF src) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF dst src));
+
+  format %{ "FLD    $src\n\t"
+            "FADDp  $dst,ST" %}
+  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
+  ins_encode( Push_Reg_F(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Add two single precision floating point values in xmm
+instruct addX_reg(regX dst, regX src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (AddF dst src));
+  format %{ "ADDSS  $dst,$src" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct addX_imm(regX dst, immXF con) %{
+  predicate(UseSSE>=1);
+  match(Set dst (AddF dst con));
+  format %{ "ADDSS  $dst,[$con]" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct addX_mem(regX dst, memory mem) %{
+  predicate(UseSSE>=1);
+  match(Set dst (AddF dst (LoadF mem)));
+  format %{ "ADDSS  $dst,$mem" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Subtract two single precision floating point values in xmm
+instruct subX_reg(regX dst, regX src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (SubF dst src));
+  format %{ "SUBSS  $dst,$src" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct subX_imm(regX dst, immXF con) %{
+  predicate(UseSSE>=1);
+  match(Set dst (SubF dst con));
+  format %{ "SUBSS  $dst,[$con]" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct subX_mem(regX dst, memory mem) %{
+  predicate(UseSSE>=1);
+  match(Set dst (SubF dst (LoadF mem)));
+  format %{ "SUBSS  $dst,$mem" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Multiply two single precision floating point values in xmm
+instruct mulX_reg(regX dst, regX src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (MulF dst src));
+  format %{ "MULSS  $dst,$src" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulX_imm(regX dst, immXF con) %{
+  predicate(UseSSE>=1);
+  match(Set dst (MulF dst con));
+  format %{ "MULSS  $dst,[$con]" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulX_mem(regX dst, memory mem) %{
+  predicate(UseSSE>=1);
+  match(Set dst (MulF dst (LoadF mem)));
+  format %{ "MULSS  $dst,$mem" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Divide two single precision floating point values in xmm
+instruct divX_reg(regX dst, regX src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (DivF dst src));
+  format %{ "DIVSS  $dst,$src" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct divX_imm(regX dst, immXF con) %{
+  predicate(UseSSE>=1);
+  match(Set dst (DivF dst con));
+  format %{ "DIVSS  $dst,[$con]" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct divX_mem(regX dst, memory mem) %{
+  predicate(UseSSE>=1);
+  match(Set dst (DivF dst (LoadF mem)));
+  format %{ "DIVSS  $dst,$mem" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
+  ins_pipe( pipe_slow );
+%}
+
+// Get the square root of a double precision floating point values in xmm
+instruct sqrtXD_reg(regXD dst, regXD src) %{
+  predicate(UseSSE==2);
+  match(Set dst (SqrtD src));
+  format %{ "SQRTSD $dst,$src" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct sqrtXD_mem(regXD dst, memory mem) %{
+  predicate(UseSSE==2);
+  match(Set dst (SqrtD (LoadD mem)));
+  format %{ "SQRTSD $dst,$mem" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
+  ins_pipe( pipe_slow );
+%}
+
+instruct absF_reg(regFPR1 dst, regFPR1 src) %{
+  predicate(UseSSE==0);
+  match(Set dst (AbsF src));
+  ins_cost(100);
+  format %{ "FABS" %}
+  opcode(0xE1, 0xD9);
+  ins_encode( OpcS, OpcP );
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct absX_reg(regX dst ) %{
+  predicate(UseSSE>=1);
+  match(Set dst (AbsF dst));
+  format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
+  ins_encode( AbsXF_encoding(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct negF_reg(regFPR1 dst, regFPR1 src) %{
+  predicate(UseSSE==0);
+  match(Set dst (NegF src));
+  ins_cost(100);
+  format %{ "FCHS" %}
+  opcode(0xE0, 0xD9);
+  ins_encode( OpcS, OpcP );
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct negX_reg( regX dst ) %{
+  predicate(UseSSE>=1);
+  match(Set dst (NegF dst));
+  format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
+  ins_encode( NegXF_encoding(dst));
+  ins_pipe( pipe_slow );
+%}
+
+// Cisc-alternate to addF_reg
+// Spill to obtain 24-bit precision
+instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF src1 (LoadF src2)));
+
+  format %{ "FLD    $src2\n\t"
+            "FADD   ST,$src1\n\t"
+            "FSTP_S $dst" %}
+  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+              OpcReg_F(src1),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_reg_mem );
+%}
+//
+// Cisc-alternate to addF_reg
+// This instruction does not round to 24-bits
+instruct addF_reg_mem(regF dst, memory src) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF dst (LoadF src)));
+
+  format %{ "FADD   $dst,$src" %}
+  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+// // Following two instructions for _222_mpegaudio
+// Spill to obtain 24-bit precision
+instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF src1 src2));
+
+  format %{ "FADD   $dst,$src1,$src2" %}
+  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
+              OpcReg_F(src2),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_reg_mem );
+%}
+
+// Cisc-spill variant
+// Spill to obtain 24-bit precision
+instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF src1 (LoadF src2)));
+
+  format %{ "FADD   $dst,$src1,$src2 cisc" %}
+  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+              set_instruction_start,
+              OpcP, RMopc_Mem(secondary,src1),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_mem_mem );
+%}
+
+// Spill to obtain 24-bit precision
+instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF src1 src2));
+
+  format %{ "FADD   $dst,$src1,$src2" %}
+  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+              set_instruction_start,
+              OpcP, RMopc_Mem(secondary,src1),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_mem_mem );
+%}
+
+
+// Spill to obtain 24-bit precision
+instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF src1 src2));
+  format %{ "FLD    $src1\n\t"
+            "FADD   $src2\n\t"
+            "FSTP_S $dst"  %}
+  opcode(0xD8, 0x00);       /* D8 /0 */
+  ins_encode( Push_Reg_F(src1),
+              Opc_MemImm_F(src2),
+              Pop_Mem_F(dst));
+  ins_pipe( fpu_mem_reg_con );
+%}
+//
+// This instruction does not round to 24-bits
+instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF src1 src2));
+  format %{ "FLD    $src1\n\t"
+            "FADD   $src2\n\t"
+            "FSTP_S $dst"  %}
+  opcode(0xD8, 0x00);       /* D8 /0 */
+  ins_encode( Push_Reg_F(src1),
+              Opc_MemImm_F(src2),
+              Pop_Reg_F(dst));
+  ins_pipe( fpu_reg_reg_con );
+%}
+
+// Spill to obtain 24-bit precision
+instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (MulF src1 src2));
+
+  format %{ "FLD    $src1\n\t"
+            "FMUL   $src2\n\t"
+            "FSTP_S $dst"  %}
+  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
+  ins_encode( Push_Reg_F(src1),
+              OpcReg_F(src2),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_reg_reg );
+%}
+//
+// This instruction does not round to 24-bits
+instruct mulF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (MulF src1 src2));
+
+  format %{ "FLD    $src1\n\t"
+            "FMUL   $src2\n\t"
+            "FSTP_S $dst"  %}
+  opcode(0xD8, 0x1); /* D8 C8+i */
+  ins_encode( Push_Reg_F(src2),
+              OpcReg_F(src1),
+              Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_reg_reg );
+%}
+
+
+// Spill to obtain 24-bit precision
+// Cisc-alternate to reg-reg multiply
+instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (MulF src1 (LoadF src2)));
+
+  format %{ "FLDS   $src2\n\t"
+            "FMUL   $src1\n\t"
+            "FSTP_S $dst"  %}
+  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+              OpcReg_F(src1),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_reg_mem );
+%}
+//
+// This instruction does not round to 24-bits
+// Cisc-alternate to reg-reg multiply
+instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (MulF src1 (LoadF src2)));
+
+  format %{ "FMUL   $dst,$src1,$src2" %}
+  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+              OpcReg_F(src1),
+              Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_reg_mem );
+%}
+
+// Spill to obtain 24-bit precision
+instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (MulF src1 src2));
+
+  format %{ "FMUL   $dst,$src1,$src2" %}
+  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
+              set_instruction_start,
+              OpcP, RMopc_Mem(secondary,src1),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_mem_mem );
+%}
+
+// Spill to obtain 24-bit precision
+instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (MulF src1 src2));
+
+  format %{ "FMULc $dst,$src1,$src2" %}
+  opcode(0xD8, 0x1);  /* D8 /1*/
+  ins_encode( Push_Reg_F(src1),
+              Opc_MemImm_F(src2),
+              Pop_Mem_F(dst));
+  ins_pipe( fpu_mem_reg_con );
+%}
+//
+// This instruction does not round to 24-bits
+instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (MulF src1 src2));
+
+  format %{ "FMULc $dst. $src1, $src2" %}
+  opcode(0xD8, 0x1);  /* D8 /1*/
+  ins_encode( Push_Reg_F(src1),
+              Opc_MemImm_F(src2),
+              Pop_Reg_F(dst));
+  ins_pipe( fpu_reg_reg_con );
+%}
+
+
+//
+// MACRO1 -- subsume unshared load into mulF
+// This instruction does not round to 24-bits
+instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (MulF (LoadF mem1) src));
+
+  format %{ "FLD    $mem1    ===MACRO1===\n\t"
+            "FMUL   ST,$src\n\t"
+            "FSTP   $dst" %}
+  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
+  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
+              OpcReg_F(src),
+              Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_reg_mem );
+%}
+//
+// MACRO2 -- addF a mulF which subsumed an unshared load
+// This instruction does not round to 24-bits
+instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
+  ins_cost(95);
+
+  format %{ "FLD    $mem1     ===MACRO2===\n\t"
+            "FMUL   ST,$src1  subsume mulF left load\n\t"
+            "FADD   ST,$src2\n\t"
+            "FSTP   $dst" %}
+  opcode(0xD9); /* LoadF D9 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
+              FMul_ST_reg(src1),
+              FAdd_ST_reg(src2),
+              Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_mem_reg_reg );
+%}
+
+// MACRO3 -- addF a mulF
+// This instruction does not round to 24-bits.  It is a '2-address'
+// instruction in that the result goes back to src2.  This eliminates
+// a move from the macro; possibly the register allocator will have
+// to add it back (and maybe not).
+instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set src2 (AddF (MulF src0 src1) src2));
+
+  format %{ "FLD    $src0     ===MACRO3===\n\t"
+            "FMUL   ST,$src1\n\t"
+            "FADDP  $src2,ST" %}
+  opcode(0xD9); /* LoadF D9 /0 */
+  ins_encode( Push_Reg_F(src0),
+              FMul_ST_reg(src1),
+              FAddP_reg_ST(src2) );
+  ins_pipe( fpu_reg_reg_reg );
+%}
+
+// MACRO4 -- divF subF
+// This instruction does not round to 24-bits
+instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (DivF (SubF src2 src1) src3));
+
+  format %{ "FLD    $src2   ===MACRO4===\n\t"
+            "FSUB   ST,$src1\n\t"
+            "FDIV   ST,$src3\n\t"
+            "FSTP  $dst" %}
+  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
+  ins_encode( Push_Reg_F(src2),
+              subF_divF_encode(src1,src3),
+              Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_reg_reg_reg );
+%}
+
+// Spill to obtain 24-bit precision
+instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
+  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (DivF src1 src2));
+
+  format %{ "FDIV   $dst,$src1,$src2" %}
+  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
+  ins_encode( Push_Reg_F(src1),
+              OpcReg_F(src2),
+              Pop_Mem_F(dst) );
+  ins_pipe( fpu_mem_reg_reg );
+%}
+//
+// This instruction does not round to 24-bits
+instruct divF_reg(regF dst, regF src) %{
+  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (DivF dst src));
+
+  format %{ "FDIV   $dst,$src" %}
+  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
+  ins_encode( Push_Reg_F(src),
+              OpcP, RegOpc(dst) );
+  ins_pipe( fpu_reg_reg );
+%}
+
+
+// Spill to obtain 24-bit precision
+instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI eax, eFlagsReg cr) %{
+  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (ModF src1 src2));
+  effect(KILL eax, KILL cr); // emitModD() uses EAX and EFLAGS
+
+  format %{ "FMOD   $dst,$src1,$src2" %}
+  ins_encode( Push_Reg_Mod_D(src1, src2),
+              emitModD(),
+              Push_Result_Mod_D(src2),
+              Pop_Mem_F(dst));
+  ins_pipe( pipe_slow );
+%}
+//
+// This instruction does not round to 24-bits
+instruct modF_reg(regF dst, regF src, eAXRegI eax, eFlagsReg cr) %{
+  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (ModF dst src));
+  effect(KILL eax, KILL cr); // emitModD() uses EAX and EFLAGS
+
+  format %{ "FMOD   $dst,$src" %}
+  ins_encode(Push_Reg_Mod_D(dst, src),
+              emitModD(),
+              Push_Result_Mod_D(src),
+              Pop_Reg_F(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI eax, regFPR1 tmp, eFlagsReg cr) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ModF src0 src1));
+  effect(KILL eax, KILL tmp, KILL cr);
+  format %{ "SUB    ESP,4\n"
+          "\tMOVSS  [ESP+0],$src1\n"
+          "\tFPOP\n"
+          "\tFLD_S  [ESP+0]\n"
+          "\tMOVSS  [ESP+0],$src0\n"
+          "\tFLD_S  [ESP+0]\n"
+     "loop:\tFPREM\n"
+          "\tFWAIT\n"
+          "\tFNSTSW AX\n"
+          "\tSAHF\n"
+          "\tJP     loop\n"
+          "\tFSTP_S [ESP+0]\n"
+          "\tMOVSS  $dst,[ESP+0]\n"
+          "\tADD    ESP,4"
+    %}
+  ins_cost(250);
+  ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4));
+  ins_pipe( pipe_slow );
+%}
+
+
+//----------Arithmetic Conversion Instructions---------------------------------
+// The conversions operations are all Alpha sorted.  Please keep it that way!
+
+instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
+  predicate(UseSSE==0);
+  match(Set dst (RoundFloat src));
+  ins_cost(125);
+  format %{ "FLD    $src\n\t"
+            "FSTP_S $dst\t# F-round" %}
+  ins_encode( Push_Reg_F(src),
+              Pop_Mem_F(dst));
+  ins_pipe( fpu_mem_reg );
+%}
+
+instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
+  predicate(UseSSE<=1);
+  match(Set dst (RoundDouble src));
+  ins_cost(125);
+  format %{ "FLD    $src\n\t"
+            "FSTP_D $dst\t# D-round" %}
+  ins_encode( Push_Reg_D(src),
+              Pop_Mem_D(dst));
+  ins_pipe( fpu_mem_reg );
+%}
+
+// Force rounding to 24-bit precision and 6-bit exponent
+instruct convD2F_reg(stackSlotF dst, regD src) %{
+  predicate(UseSSE==0);
+  match(Set dst (ConvD2F src));
+  format %{ "D2F    $dst,$src" %}
+  expand %{
+    roundFloat_mem_reg(dst,src);
+  %}
+%}
+
+// Force rounding to 24-bit precision and 6-bit exponent
+instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
+  predicate(UseSSE==1);
+  match(Set dst (ConvD2F src));
+  effect( KILL cr );
+  format %{ "SUB    ESP,4\n\t"
+            "FLD    $src\n\t"
+            "FSTP_S [ESP]\t# F-round\n\t"
+            "MOVSS  $dst,[ESP]\n\t"
+            "ADD ESP,4" %}
+  ins_encode( D2X_encoding(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+// Force rounding double precision to single precision
+instruct convXD2X_reg(regX dst, regXD src) %{
+  predicate(UseSSE==2);
+  match(Set dst (ConvD2F src));
+  format %{ "CVTSD2SS $dst,$src" %}
+  opcode(0xF2, 0x0F, 0x5A);
+  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
+  predicate(UseSSE==1);
+  match(Set dst (ConvF2D src));
+  effect( KILL cr );
+  format %{ "SUB    ESP,4\n\t"
+            "MOVSS  [ESP] $src\n\t"
+            "FLD    [ESP]\n\t"
+            "ADD    ESP,4\n\t"
+            "FSTP   $dst" %}
+  ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convX2XD_reg(regXD dst, regX src) %{
+  predicate(UseSSE==2);
+  match(Set dst (ConvF2D src));
+  format %{ "CVTSS2SD $dst,$src" %}
+  opcode(0xF3, 0x0F, 0x5A);
+  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
+instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
+  predicate(UseSSE<=1);
+  match(Set dst (ConvD2I src));
+  effect( KILL tmp, KILL cr );
+  format %{ "FLD    $src\t# Convert double to int \n\t"
+            "FLDCW  trunc mode\n\t"
+            "SUB    ESP,4\n\t"
+            "FISTp  [ESP + #0]\n\t"
+            "FLDCW  std/24-bit mode\n\t"
+            "POP    EAX\n\t"
+            "CMP    EAX,0x80000000\n\t"
+            "JNE,s  fast\n\t"
+            "FLD_D  $src\n\t"
+            "CALL   d2i_wrapper\n"
+      "fast:" %}
+  ins_encode( Push_Reg_D(src), D2I_encoding(src) );
+  ins_pipe( pipe_slow );
+%}
+
+// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
+instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
+  predicate(UseSSE==2);
+  match(Set dst (ConvD2I src));
+  effect( KILL tmp, KILL cr );
+  format %{ "CVTTSD2SI $dst, $src\n\t"
+            "CMP    $dst,0x80000000\n\t"
+            "JNE,s  fast\n\t"
+            "SUB    ESP, 8\n\t"
+            "MOVSD  [ESP], $src\n\t"
+            "FLD_D  [ESP]\n\t"
+            "ADD    ESP, 8\n\t"
+            "CALL   d2i_wrapper\n\t"
+      "fast:" %}
+  opcode(0x1); // double-precision conversion
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
+  match(Set dst (ConvD2L src));
+  effect( KILL cr );
+  format %{ "FLD    $src\t# Convert double to long\n\t"
+            "FLDCW  trunc mode\n\t"
+            "SUB    ESP,8\n\t"
+            "FISTp  [ESP + #0]\n\t"
+            "FLDCW  std/24-bit mode\n\t"
+            "POP    EAX\n\t"
+            "POP    EDX\n\t"
+            "CMP    EDX,0x80000000\n\t"
+            "JNE,s  fast\n\t"
+            "TEST   EAX,EAX\n\t"
+            "JNE,s  fast\n\t"
+            "FLD    $src\n\t"
+            "CALL   d2l_wrapper\n"
+      "fast:" %}
+  ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
+  ins_pipe( pipe_slow );
+%}
+
+// XMM lacks a float/double->long conversion, so use the old FPU stack.
+instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
+  predicate (UseSSE==2);
+  match(Set dst (ConvD2L src));
+  effect( KILL cr );
+  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
+            "MOVSD  [ESP],$src\n\t"
+            "FLD_D  [ESP]\n\t"
+            "FLDCW  trunc mode\n\t"
+            "FISTp  [ESP + #0]\n\t"
+            "FLDCW  std/24-bit mode\n\t"
+            "POP    EAX\n\t"
+            "POP    EDX\n\t"
+            "CMP    EDX,0x80000000\n\t"
+            "JNE,s  fast\n\t"
+            "TEST   EAX,EAX\n\t"
+            "JNE,s  fast\n\t"
+            "SUB    ESP,8\n\t"
+            "MOVSD  [ESP],$src\n\t"
+            "FLD_D  [ESP]\n\t"
+            "CALL   d2l_wrapper\n"
+      "fast:" %}
+  ins_encode( XD2L_encoding(src) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct convF2D_reg(regD dst, regF src) %{
+  predicate(UseSSE==0);
+  match(Set dst (ConvF2D src));
+  format %{ "FLD    $src\n\t"
+            "FSTP   $dst" %}
+  ins_encode(Push_Reg_F(src), Pop_Reg_D(dst));
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Convert a double to an int.  Java semantics require we do complex
+// manglations in the corner cases.  So we set the rounding mode to
+// 'zero', store the darned double down as an int, and reset the
+// rounding mode to 'nearest'.  The hardware stores a flag value down
+// if we would overflow or converted a NAN; we check for this and
+// and go the slow path if needed.
+instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
+  predicate(UseSSE==0);
+  match(Set dst (ConvF2I src));
+  effect( KILL tmp, KILL cr );
+  format %{ "FLD    $src\t# Convert float to int \n\t"
+            "FLDCW  trunc mode\n\t"
+            "SUB    ESP,4\n\t"
+            "FISTp  [ESP + #0]\n\t"
+            "FLDCW  std/24-bit mode\n\t"
+            "POP    EAX\n\t"
+            "CMP    EAX,0x80000000\n\t"
+            "JNE,s  fast\n\t"
+            "FLD    $src\n\t"
+            "CALL   d2i_wrapper\n"
+      "fast:" %}
+  // D2I_encoding works for F2I
+  ins_encode( Push_Reg_F(src), D2I_encoding(src) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
+  match(Set dst (ConvF2L src));
+  effect( KILL cr );
+  format %{ "FLD    $src\t# Convert float to long\n\t"
+            "FLDCW  trunc mode\n\t"
+            "SUB    ESP,8\n\t"
+            "FISTp  [ESP + #0]\n\t"
+            "FLDCW  std/24-bit mode\n\t"
+            "POP    EAX\n\t"
+            "POP    EDX\n\t"
+            "CMP    EDX,0x80000000\n\t"
+            "JNE,s  fast\n\t"
+            "TEST   EAX,EAX\n\t"
+            "JNE,s  fast\n\t"
+            "FLD    $src\n\t"
+            "CALL   d2l_wrapper\n"
+      "fast:" %}
+  // D2L_encoding works for F2L
+  ins_encode( Push_Reg_F(src), D2L_encoding(src) );
+  ins_pipe( pipe_slow );
+%}
+
+// XMM lacks a float/double->long conversion, so use the old FPU stack.
+instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
+  predicate (UseSSE>=1);
+  match(Set dst (ConvF2L src));
+  effect( KILL cr );
+  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
+            "MOVSS  [ESP],$src\n\t"
+            "FLD_S  [ESP]\n\t"
+            "FLDCW  trunc mode\n\t"
+            "FISTp  [ESP + #0]\n\t"
+            "FLDCW  std/24-bit mode\n\t"
+            "POP    EAX\n\t"
+            "POP    EDX\n\t"
+            "CMP    EDX,0x80000000\n\t"
+            "JNE,s  fast\n\t"
+            "TEST   EAX,EAX\n\t"
+            "JNE,s  fast\n\t"
+            "SUB    ESP,4\t# Convert float to long\n\t"
+            "MOVSS  [ESP],$src\n\t"
+            "FLD_S  [ESP]\n\t"
+            "ADD    ESP,4\n\t"
+            "CALL   d2l_wrapper\n"
+      "fast:" %}
+  ins_encode( X2L_encoding(src) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2D_reg(regD dst, stackSlotI src) %{
+  predicate( UseSSE<=1 );
+  match(Set dst (ConvI2D src));
+  format %{ "FILD   $src\n\t"
+            "FSTP   $dst" %}
+  opcode(0xDB, 0x0);  /* DB /0 */
+  ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
+  ins_pipe( fpu_reg_mem );
+%}
+
+instruct convI2XD_reg(regXD dst, eRegI src) %{
+  predicate( UseSSE==2 );
+  match(Set dst (ConvI2D src));
+  format %{ "CVTSI2SD $dst,$src" %}
+  opcode(0xF2, 0x0F, 0x2A);
+  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2XD_mem(regXD dst, memory mem) %{
+  predicate( UseSSE==2 );
+  match(Set dst (ConvI2D (LoadI mem)));
+  format %{ "CVTSI2SD $dst,$mem" %}
+  opcode(0xF2, 0x0F, 0x2A);
+  ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2D_mem(regD dst, memory mem) %{
+  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (ConvI2D (LoadI mem)));
+  format %{ "FILD   $mem\n\t"
+            "FSTP   $dst" %}
+  opcode(0xDB);      /* DB /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),
+              Pop_Reg_D(dst));
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Convert a byte to a float; no rounding step needed.
+instruct conv24I2F_reg(regF dst, stackSlotI src) %{
+  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
+  match(Set dst (ConvI2F src));
+  format %{ "FILD   $src\n\t"
+            "FSTP   $dst" %}
+
+  opcode(0xDB, 0x0);  /* DB /0 */
+  ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
+  ins_pipe( fpu_reg_mem );
+%}
+
+// In 24-bit mode, force exponent rounding by storing back out
+instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
+  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (ConvI2F src));
+  ins_cost(200);
+  format %{ "FILD   $src\n\t"
+            "FSTP_S $dst" %}
+  opcode(0xDB, 0x0);  /* DB /0 */
+  ins_encode( Push_Mem_I(src),
+              Pop_Mem_F(dst));
+  ins_pipe( fpu_mem_mem );
+%}
+
+// In 24-bit mode, force exponent rounding by storing back out
+instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
+  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
+  match(Set dst (ConvI2F (LoadI mem)));
+  ins_cost(200);
+  format %{ "FILD   $mem\n\t"
+            "FSTP_S $dst" %}
+  opcode(0xDB);  /* DB /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),
+              Pop_Mem_F(dst));
+  ins_pipe( fpu_mem_mem );
+%}
+
+// This instruction does not round to 24-bits
+instruct convI2F_reg(regF dst, stackSlotI src) %{
+  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (ConvI2F src));
+  format %{ "FILD   $src\n\t"
+            "FSTP   $dst" %}
+  opcode(0xDB, 0x0);  /* DB /0 */
+  ins_encode( Push_Mem_I(src),
+              Pop_Reg_F(dst));
+  ins_pipe( fpu_reg_mem );
+%}
+
+// This instruction does not round to 24-bits
+instruct convI2F_mem(regF dst, memory mem) %{
+  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
+  match(Set dst (ConvI2F (LoadI mem)));
+  format %{ "FILD   $mem\n\t"
+            "FSTP   $dst" %}
+  opcode(0xDB);      /* DB /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),
+              Pop_Reg_F(dst));
+  ins_pipe( fpu_reg_mem );
+%}
+
+// Convert an int to a float in xmm; no rounding step needed.
+instruct convI2X_reg(regX dst, eRegI src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ConvI2F src));
+  format %{ "CVTSI2SS $dst, $src" %}
+
+  opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
+  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+  ins_pipe( pipe_slow );
+%}
+
+// Convert a float in xmm to an int reg.
+instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ConvF2I src));
+  effect( KILL tmp, KILL cr );
+  format %{ "CVTTSS2SI $dst, $src\n\t"
+            "CMP    $dst,0x80000000\n\t"
+            "JNE,s  fast\n\t"
+            "SUB    ESP, 4\n\t"
+            "MOVSS  [ESP], $src\n\t"
+            "FLD    [ESP]\n\t"
+            "ADD    ESP, 4\n\t"
+            "CALL   d2i_wrapper\n\t"
+      "fast:" %}
+  opcode(0x0); // single-precision conversion
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
+  match(Set dst (ConvI2L src));
+  effect(KILL cr);
+  format %{ "MOV    $dst.lo,$src\n\t"
+            "MOV    $dst.hi,$src\n\t"
+            "SAR    $dst.hi,31" %}
+  ins_encode(convert_int_long(dst,src));
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// Zero-extend convert int to long
+instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
+  match(Set dst (AndL (ConvI2L src) mask) );
+  effect( KILL flags );
+  format %{ "MOV    $dst.lo,$src\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+  opcode(0x33); // XOR
+  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+instruct convI2L_reg_reg_zex(eRegL dst, eRegI src, eRegL mask, eFlagsReg flags) %{
+  match(Set dst (AndL (ConvI2L src) mask) );
+  predicate(_kids[1]->_leaf->Opcode() == Op_ConL &&
+            _kids[1]->_leaf->is_Type()->type()->is_long()->get_con() == 0xFFFFFFFFl);
+  effect( KILL flags );
+  format %{ "MOV    $dst.lo,$src\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+  opcode(0x33); // XOR
+  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// Zero-extend long
+instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
+  match(Set dst (AndL src mask) );
+  effect( KILL flags );
+  format %{ "MOV    $dst.lo,$src.lo\n\t"
+            "XOR    $dst.hi,$dst.hi\n\t" %}
+  opcode(0x33); // XOR
+  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
+  predicate (UseSSE<=1);
+  match(Set dst (ConvL2D src));
+  effect( KILL cr );
+  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
+            "PUSH   $src.lo\n\t"
+            "FILD   ST,[ESP + #0]\n\t"
+            "ADD    ESP,8\n\t"
+            "FSTP_D $dst\t# D-round" %}
+  opcode(0xDF, 0x5);  /* DF /5 */
+  ins_encode(convert_long_double(src), Pop_Mem_D(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
+  predicate (UseSSE==2);
+  match(Set dst (ConvL2D src));
+  effect( KILL cr );
+  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
+            "PUSH   $src.lo\n\t"
+            "FILD_D [ESP]\n\t"
+            "FSTP_D [ESP]\n\t"
+            "MOVSD  $dst,[ESP]\n\t"
+            "ADD    ESP,8" %}
+  opcode(0xDF, 0x5);  /* DF /5 */
+  ins_encode(convert_long_double2(src), Push_ResultXD(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
+  predicate (UseSSE>=1);
+  match(Set dst (ConvL2F src));
+  effect( KILL cr );
+  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
+            "PUSH   $src.lo\n\t"
+            "FILD_D [ESP]\n\t"
+            "FSTP_S [ESP]\n\t"
+            "MOVSS  $dst,[ESP]\n\t"
+            "ADD    ESP,8" %}
+  opcode(0xDF, 0x5);  /* DF /5 */
+  ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
+  match(Set dst (ConvL2F src));
+  effect( KILL cr );
+  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
+            "PUSH   $src.lo\n\t"
+            "FILD   ST,[ESP + #0]\n\t"
+            "ADD    ESP,8\n\t"
+            "FSTP_S $dst\t# F-round" %}
+  opcode(0xDF, 0x5);  /* DF /5 */
+  ins_encode(convert_long_double(src), Pop_Mem_F(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2I_reg( eRegI dst, eRegL src ) %{
+  match(Set dst (ConvL2I src));
+  effect( DEF dst, USE src );
+  format %{ "MOV    $dst,$src.lo" %}
+  ins_encode(enc_CopyL_Lo(dst,src));
+  ins_pipe( ialu_reg_reg );
+%}
+
+
+instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
+  match(Set dst (MoveF2I src));
+  effect( DEF dst, USE src );
+  ins_cost(125);
+  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
+  opcode(0x8B);
+  ins_encode( OpcP, RegMem(dst,src));
+  ins_pipe( ialu_reg_mem );
+%}
+
+instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
+  predicate(UseSSE==0);
+  match(Set dst (MoveF2I src));
+  effect( DEF dst, USE src );
+
+  ins_cost(125);
+  format %{ "FLD    $src\n\t"
+            "FSTP_S $dst\t# MoveF2I_reg_stack" %}
+  ins_encode( Push_Reg_F(src),
+              Pop_Mem_F(dst));
+  ins_pipe( fpu_mem_reg );
+%}
+
+instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (MoveF2I src));
+  effect( DEF dst, USE src );
+
+  ins_cost(95);
+  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
+  match(Set dst (MoveI2F src));
+  effect( DEF dst, USE src );
+
+  ins_cost(100);
+  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
+  opcode(0x89);
+  ins_encode( OpcPRegSS( dst, src ) );
+  ins_pipe( ialu_mem_reg );
+%}
+
+
+instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
+  predicate(UseSSE==0);
+  match(Set dst (MoveI2F src));
+  effect(DEF dst, USE src);
+
+  ins_cost(125);
+  format %{ "FLD_S  $src\n\t"
+            "FSTP   $dst\t# MoveI2F_stack_reg" %}
+  opcode(0xD9);               /* D9 /0, FLD m32real */
+  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
+              Pop_Reg_F(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (MoveI2F src));
+  effect( DEF dst, USE src );
+
+  ins_cost(145);
+  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
+  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
+  match(Set dst (MoveD2L src));
+  effect(DEF dst, USE src);
+
+  ins_cost(250);
+  format %{ "MOV    $dst.lo,$src\n\t"
+            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
+  opcode(0x8B, 0x8B);
+  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
+  ins_pipe( ialu_mem_long_reg );
+%}
+
+instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
+  predicate(UseSSE<2);
+  match(Set dst (MoveD2L src));
+  effect(DEF dst, USE src);
+
+  ins_cost(125);
+  format %{ "FLD    $src\n\t"
+            "FSTP_D $dst\t# MoveD2L_reg_stack" %}
+  ins_encode( Push_Reg_D(src),
+              Pop_Mem_D(dst));
+  ins_pipe( fpu_mem_reg );
+%}
+
+instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
+  predicate(UseSSE==2);
+  match(Set dst (MoveD2L src));
+  effect(DEF dst, USE src);
+  ins_cost(145);
+
+  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
+  predicate(UseSSE<2);
+  match(Set dst (MoveL2D src));
+  effect(DEF dst, USE src);
+  ins_cost(125);
+
+  format %{ "FLD_D  $src\n\t"
+            "FSTP   $dst\t# MoveL2D_stack_reg" %}
+  opcode(0xDD);               /* DD /0, FLD m32real */
+  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
+              Pop_Reg_D(dst) );
+  ins_pipe( fpu_reg_mem );
+%}
+
+
+instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
+  predicate(UseSSE==2);
+  match(Set dst (MoveL2D src));
+  effect(DEF dst, USE src);
+
+  ins_cost(145);
+  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
+  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
+  match(Set dst (MoveL2D src));
+  effect(DEF dst, USE src);
+
+  ins_cost(200);
+  format %{ "MOV    $dst,$src.lo\n\t"
+            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
+  opcode(0x89, 0x89);
+  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
+  ins_pipe( ialu_mem_long_reg );
+%}
+
+
+
+// =======================================================================
+// fast clearing of an array
+
+instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, eRegI dummy, eFlagsReg cr) %{
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
+  format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
+            "XOR    EAX,EAX\n\t"
+            "REP STOS\t# store EAX into [EDI++] while ECX--" %}
+  opcode(0,0x4);
+  ins_encode( Opcode(0xD1), RegOpc(ECX),
+              OpcRegReg(0x33,EAX,EAX),
+              Opcode(0xF3), Opcode(0xAB) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
+  match(Set result (StrComp str1 str2));
+  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
+  //ins_cost(300);
+
+  format %{ "String Compare $str1,$str2 -> $result    // KILL EAX, EBX" %}
+  ins_encode( enc_String_Compare() );
+  ins_pipe( pipe_slow );
+%}
+
+//----------Control Flow Instructions------------------------------------------
+// Signed compare Instructions
+instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
+  match(Set cr (CmpI op1 op2));
+  effect( DEF cr, USE op1, USE op2 );
+  format %{ "CMP    $op1,$op2" %}
+  opcode(0x3B);  /* Opcode 3B /r */
+  ins_encode( OpcP, RegReg( op1, op2) );
+  ins_pipe( ialu_cr_reg_reg );
+%}
+
+instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
+  match(Set cr (CmpI op1 op2));
+  effect( DEF cr, USE op1 );
+  format %{ "CMP    $op1,$op2" %}
+  opcode(0x81,0x07);  /* Opcode 81 /7 */
+  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
+  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+// Cisc-spilled version of cmpI_eReg
+instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
+  match(Set cr (CmpI op1 (LoadI op2)));
+
+  format %{ "CMP    $op1,$op2" %}
+  ins_cost(500);
+  opcode(0x3B);  /* Opcode 3B /r */
+  ins_encode( OpcP, RegMem( op1, op2) );
+  ins_pipe( ialu_cr_reg_mem );
+%}
+
+instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
+  match(Set cr (CmpI src zero));
+  effect( DEF cr, USE src );
+
+  format %{ "TEST   $src,$src" %}
+  opcode(0x85);
+  ins_encode( OpcP, RegReg( src, src ) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
+  match(Set cr (CmpI (AndI src con) zero));
+
+  format %{ "TEST   $src,$con" %}
+  opcode(0xF7,0x00);
+  ins_encode( OpcP, RegOpc(src), Con32(con) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
+  match(Set cr (CmpI (AndI src mem) zero));
+
+  format %{ "TEST   $src,$mem" %}
+  opcode(0x85);
+  ins_encode( OpcP, RegMem( src, mem ) );
+  ins_pipe( ialu_cr_reg_mem );
+%}
+
+// Unsigned compare Instructions; really, same as signed except they
+// produce an eFlagsRegU instead of eFlagsReg.
+instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
+  match(Set cr (CmpU op1 op2));
+
+  format %{ "CMPu   $op1,$op2" %}
+  opcode(0x3B);  /* Opcode 3B /r */
+  ins_encode( OpcP, RegReg( op1, op2) );
+  ins_pipe( ialu_cr_reg_reg );
+%}
+
+instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
+  match(Set cr (CmpU op1 op2));
+
+  format %{ "CMPu   $op1,$op2" %}
+  opcode(0x81,0x07);  /* Opcode 81 /7 */
+  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+// // Cisc-spilled version of cmpU_eReg
+instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
+  match(Set cr (CmpU op1 (LoadI op2)));
+
+  format %{ "CMPu   $op1,$op2" %}
+  ins_cost(500);
+  opcode(0x3B);  /* Opcode 3B /r */
+  ins_encode( OpcP, RegMem( op1, op2) );
+  ins_pipe( ialu_cr_reg_mem );
+%}
+
+// // Cisc-spilled version of cmpU_eReg
+//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
+//  match(Set cr (CmpU (LoadI op1) op2));
+//
+//  format %{ "CMPu   $op1,$op2" %}
+//  ins_cost(500);
+//  opcode(0x39);  /* Opcode 39 /r */
+//  ins_encode( OpcP, RegMem( op1, op2) );
+//%}
+
+instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
+  match(Set cr (CmpU src zero));
+
+  format %{ "TESTu  $src,$src" %}
+  opcode(0x85);
+  ins_encode( OpcP, RegReg( src, src ) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+// Unsigned pointer compare Instructions
+instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
+  match(Set cr (CmpP op1 op2));
+
+  format %{ "CMPu   $op1,$op2" %}
+  opcode(0x3B);  /* Opcode 3B /r */
+  ins_encode( OpcP, RegReg( op1, op2) );
+  ins_pipe( ialu_cr_reg_reg );
+%}
+
+instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
+  match(Set cr (CmpP op1 op2));
+
+  format %{ "CMPu   $op1,$op2" %}
+  opcode(0x81,0x07);  /* Opcode 81 /7 */
+  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+// // Cisc-spilled version of cmpP_eReg
+instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
+  match(Set cr (CmpP op1 (LoadP op2)));
+
+  format %{ "CMPu   $op1,$op2" %}
+  ins_cost(500);
+  opcode(0x3B);  /* Opcode 3B /r */
+  ins_encode( OpcP, RegMem( op1, op2) );
+  ins_pipe( ialu_cr_reg_mem );
+%}
+
+// // Cisc-spilled version of cmpP_eReg
+//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
+//  match(Set cr (CmpP (LoadP op1) op2));
+//
+//  format %{ "CMPu   $op1,$op2" %}
+//  ins_cost(500);
+//  opcode(0x39);  /* Opcode 39 /r */
+//  ins_encode( OpcP, RegMem( op1, op2) );
+//%}
+
+// Compare raw pointer (used in out-of-heap check).
+// Only works because non-oop pointers must be raw pointers
+// and raw pointers have no anti-dependencies.
+instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
+  predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
+  match(Set cr (CmpP op1 (LoadP op2)));
+
+  format %{ "CMPu   $op1,$op2" %}
+  opcode(0x3B);  /* Opcode 3B /r */
+  ins_encode( OpcP, RegMem( op1, op2) );
+  ins_pipe( ialu_cr_reg_mem );
+%}
+
+//
+// This will generate a signed flags result. This should be ok
+// since any compare to a zero should be eq/neq.
+instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
+  match(Set cr (CmpP src zero));
+
+  format %{ "TEST   $src,$src" %}
+  opcode(0x85);
+  ins_encode( OpcP, RegReg( src, src ) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+// Cisc-spilled version of testP_reg
+// This will generate a signed flags result. This should be ok
+// since any compare to a zero should be eq/neq.
+instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
+  match(Set cr (CmpP (LoadP op) zero));
+
+  format %{ "TEST   $op,0xFFFFFFFF" %}
+  ins_cost(500);
+  opcode(0xF7);               /* Opcode F7 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
+  ins_pipe( ialu_cr_reg_imm );
+%}
+
+// Yanked all unsigned pointer compare operations.
+// Pointer compares are done with CmpP which is already unsigned.
+
+//----------Max and Min--------------------------------------------------------
+// Min Instructions
+////
+//   *** Min and Max using the conditional move are slower than the
+//   *** branch version on a Pentium III.
+// // Conditional move for min
+//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//  effect( USE_DEF op2, USE op1, USE cr );
+//  format %{ "CMOVlt $op2,$op1\t! min" %}
+//  opcode(0x4C,0x0F);
+//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
+//  ins_pipe( pipe_cmov_reg );
+//%}
+//
+//// Min Register with Register (P6 version)
+//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//  predicate(VM_Version::supports_cmov() );
+//  match(Set op2 (MinI op1 op2));
+//  ins_cost(200);
+//  expand %{
+//    eFlagsReg cr;
+//    compI_eReg(cr,op1,op2);
+//    cmovI_reg_lt(op2,op1,cr);
+//  %}
+//%}
+
+// Min Register with Register (generic version)
+instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
+  match(Set dst (MinI dst src));
+  effect(KILL flags);
+  ins_cost(300);
+
+  format %{ "MIN    $dst,$src" %}
+  opcode(0xCC);
+  ins_encode( min_enc(dst,src) );
+  ins_pipe( pipe_slow );
+%}
+
+// Max Register with Register
+//   *** Min and Max using the conditional move are slower than the
+//   *** branch version on a Pentium III.
+// // Conditional move for max
+//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//  effect( USE_DEF op2, USE op1, USE cr );
+//  format %{ "CMOVgt $op2,$op1\t! max" %}
+//  opcode(0x4F,0x0F);
+//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
+//  ins_pipe( pipe_cmov_reg );
+//%}
+//
+// // Max Register with Register (P6 version)
+//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//  predicate(VM_Version::supports_cmov() );
+//  match(Set op2 (MaxI op1 op2));
+//  ins_cost(200);
+//  expand %{
+//    eFlagsReg cr;
+//    compI_eReg(cr,op1,op2);
+//    cmovI_reg_gt(op2,op1,cr);
+//  %}
+//%}
+
+// Max Register with Register (generic version)
+instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
+  match(Set dst (MaxI dst src));
+  effect(KILL flags);
+  ins_cost(300);
+
+  format %{ "MAX    $dst,$src" %}
+  opcode(0xCC);
+  ins_encode( max_enc(dst,src) );
+  ins_pipe( pipe_slow );
+%}
+
+// ============================================================================
+// Branch Instructions
+// Jump Direct - Label defines a relative address from JMP+1
+instruct jmpDir(label labl) %{
+  match(Goto);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "JMP    $labl" %}
+  size(5);
+  opcode(0xE9);
+  ins_encode( OpcP, Lbl( labl ) );
+  ins_pipe( pipe_jmp );
+  ins_pc_relative(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
+  match(If cop cr);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop    $labl" %}
+  size(6);
+  opcode(0x0F, 0x80);
+  ins_encode( Jcc( cop, labl) );
+  ins_pipe( pipe_jcc );
+  ins_pc_relative(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
+  match(CountedLoopEnd cop cr);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop    $labl\t# Loop end" %}
+  size(6);
+  opcode(0x0F, 0x80);
+  ins_encode( Jcc( cop, labl) );
+  ins_pipe( pipe_jcc );
+  ins_pc_relative(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
+  match(CountedLoopEnd cop cmp);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop,u  $labl\t# Loop end" %}
+  size(6);
+  opcode(0x0F, 0x80);
+  ins_encode( Jcc( cop, labl) );
+  ins_pipe( pipe_jcc );
+  ins_pc_relative(1);
+%}
+
+// Jump Direct Conditional - using unsigned comparison
+instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
+  match(If cop cmp);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop,u  $labl" %}
+  size(6);
+  opcode(0x0F, 0x80);
+  ins_encode( Jcc( cop, labl) );
+  ins_pipe( pipe_jcc );
+  ins_pc_relative(1);
+%}
+
+// ============================================================================
+// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
+// array for an instance of the superklass.  Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()).  Return
+// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
+instruct partialSubtypeCheck( eDIRegI result, eSIRegP sub, eAXRegP super, eCXRegI ecx, eFlagsReg cr ) %{
+  match(Set result (PartialSubtypeCheck sub super));
+  effect( KILL ecx, KILL cr );
+
+  ins_cost(1000);
+  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
+            "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
+            "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
+            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
+            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
+            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
+            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
+     "miss:\t" %}
+
+  opcode(0x1); // Force a XOR of EDI
+  ins_encode( enc_PartialSubtypeCheck() );
+  ins_pipe( pipe_slow );
+%}
+
+instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI ecx, eDIRegI result ) %{
+  match(Set cr (PartialSubtypeCheck sub super));
+  effect( KILL ecx, KILL result );
+
+  ins_cost(1000);
+  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
+            "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
+            "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
+            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
+            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
+            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
+     "miss:\t" %}
+
+  opcode(0x0);  // No need to XOR EDI
+  ins_encode( enc_PartialSubtypeCheck() );
+  ins_pipe( pipe_slow );
+%}
+
+// ============================================================================
+// Branch Instructions -- short offset versions
+//
+// These instructions are used to replace jumps of a long offset (the default
+// match) with jumps of a shorter offset.  These instructions are all tagged
+// with the ins_short_branch attribute, which causes the ADLC to suppress the
+// match rules in general matching.  Instead, the ADLC generates a conversion
+// method in the MachNode which can be used to do in-place replacement of the
+// long variant with the shorter variant.  The compiler will determine if a
+// branch can be taken by the is_short_branch_offset() predicate in the machine
+// specific code section of the file.
+
+// Jump Direct - Label defines a relative address from JMP+1
+instruct jmpDir_short(label labl) %{
+  match(Goto);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "JMP,s  $labl" %}
+  size(2);
+  opcode(0xEB);
+  ins_encode( OpcP, LblShort( labl ) );
+  ins_pipe( pipe_jmp );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
+  match(If cop cr);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop,s  $labl" %}
+  size(2);
+  opcode(0x70);
+  ins_encode( JccShort( cop, labl) );
+  ins_pipe( pipe_jcc );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
+  match(CountedLoopEnd cop cr);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop,s  $labl" %}
+  size(2);
+  opcode(0x70);
+  ins_encode( JccShort( cop, labl) );
+  ins_pipe( pipe_jcc );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
+  match(CountedLoopEnd cop cmp);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop,us $labl" %}
+  size(2);
+  opcode(0x70);
+  ins_encode( JccShort( cop, labl) );
+  ins_pipe( pipe_jcc );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+// Jump Direct Conditional - using unsigned comparison
+instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
+  match(If cop cmp);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop,us $labl" %}
+  size(2);
+  opcode(0x70);
+  ins_encode( JccShort( cop, labl) );
+  ins_pipe( pipe_jcc );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+// ============================================================================
+// Long Compare
+//
+// Currently we hold longs in 2 registers.  Comparing such values efficiently
+// is tricky.  The flavor of compare used depends on whether we are testing
+// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
+// The GE test is the negated LT test.  The LE test can be had by commuting
+// the operands (yielding a GE test) and then negating; negate again for the
+// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
+// NE test is negated from that.
+
+// Due to a shortcoming in the ADLC, it mixes up expressions like:
+// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
+// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
+// are collapsed internally in the ADLC's dfa-gen code.  The match for
+// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
+// foo match ends up with the wrong leaf.  One fix is to not match both
+// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
+// both forms beat the trinary form of long-compare and both are very useful
+// on Intel which has so few registers.
+
+// Manifest a CmpL result in an integer register.  Very painful.
+// This is the test to avoid.
+instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
+  match(Set dst (CmpL3 src1 src2));
+  effect( KILL flags );
+  ins_cost(1000);
+  format %{ "XOR    $dst,$dst\n\t"
+            "CMP    $src1.hi,$src2.hi\n\t"
+            "JLT,s  m_one\n\t"
+            "JGT,s  p_one\n\t"
+            "CMP    $src1.lo,$src2.lo\n\t"
+            "JB,s   m_one\n\t"
+            "JEQ,s  done\n"
+    "p_one:\tINC    $dst\n\t"
+            "JMP,s  done\n"
+    "m_one:\tDEC    $dst\n"
+     "done:" %}
+  opcode(0x3B, 0x1B);
+  ins_encode( cmpl3_flag(src1,src2,dst) );
+  ins_pipe( pipe_slow );
+%}
+
+//======
+// Manifest a CmpL result in the normal flags.  Only good for LT or GE
+// compares.  Can be used for LE or GT compares by reversing arguments.
+// NOT GOOD FOR EQ/NE tests.
+instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
+  match( Set flags (CmpL src zero ));
+  ins_cost(100);
+  format %{ "TEST   $src.hi,$src.hi" %}
+  opcode(0x85);
+  ins_encode( OpcP, RegReg_Hi2( src, src ) );
+  ins_pipe( ialu_cr_reg_reg );
+%}
+
+// Manifest a CmpL result in the normal flags.  Only good for LT or GE
+// compares.  Can be used for LE or GT compares by reversing arguments.
+// NOT GOOD FOR EQ/NE tests.
+instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eSIRegI tmp ) %{
+  match( Set flags (CmpL src1 src2 ));
+  effect( KILL tmp );
+  ins_cost(300);
+  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
+            "MOV    ESI,$src1.hi\n\t"
+            "SBB    ESI,$src2.hi\t! Compute flags for long compare" %}
+  ins_encode( long_cmp_flags2( src1, src2 ) );
+  ins_pipe( ialu_cr_reg_reg );
+%}
+
+// Long compares reg < zero/req OR reg >= zero/req.
+// Just a wrapper for a normal branch, plus the predicate test.
+instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
+  match(If cmp flags);
+  effect(USE labl);
+  predicate( _kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge );
+  expand %{
+    jmpCon(cmp,flags,labl);    // JLT or JGE...
+  %}
+%}
+
+// Compare 2 longs and CMOVE longs.
+instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
+  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge ));
+  ins_cost(400);
+  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
+            "CMOV$cmp $dst.hi,$src.hi" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
+  ins_pipe( pipe_cmov_reg_long );
+%}
+
+instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
+  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge ));
+  ins_cost(500);
+  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
+            "CMOV$cmp $dst.hi,$src.hi" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
+  ins_pipe( pipe_cmov_reg_long );
+%}
+
+// Compare 2 longs and CMOVE ints.
+instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge ));
+  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge ));
+  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
+  ins_cost(250);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
+  ins_pipe( pipe_cmov_mem );
+%}
+
+// Compare 2 longs and CMOVE ints.
+instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge ));
+  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
+  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge );
+  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
+  predicate( UseSSE==2 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge );
+  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovXD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovF_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ge );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovX_regS(cmp,flags,dst,src);
+  %}
+%}
+
+//======
+// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
+instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eSIRegI tmp ) %{
+  match( Set flags (CmpL src zero ));
+  effect(KILL tmp);
+  ins_cost(200);
+  format %{ "MOV    ESI,$src.lo\n\t"
+            "OR     ESI,$src.hi\t! Long is EQ/NE 0?" %}
+  ins_encode( long_cmp_flags0( src ) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
+instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
+  match( Set flags (CmpL src1 src2 ));
+  ins_cost(200+300);
+  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
+            "JNE,s  skip\n\t"
+            "CMP    $src1.hi,$src2.hi\n\t"
+     "skip:\t" %}
+  ins_encode( long_cmp_flags1( src1, src2 ) );
+  ins_pipe( ialu_cr_reg_reg );
+%}
+
+// Long compare reg == zero/reg OR reg != zero/reg
+// Just a wrapper for a normal branch, plus the predicate test.
+instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
+  match(If cmp flags);
+  effect(USE labl);
+  predicate( _kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne );
+  expand %{
+    jmpCon(cmp,flags,labl);    // JEQ or JNE...
+  %}
+%}
+
+// Compare 2 longs and CMOVE longs.
+instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
+  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne ));
+  ins_cost(400);
+  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
+            "CMOV$cmp $dst.hi,$src.hi" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
+  ins_pipe( pipe_cmov_reg_long );
+%}
+
+instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
+  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne ));
+  ins_cost(500);
+  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
+            "CMOV$cmp $dst.hi,$src.hi" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
+  ins_pipe( pipe_cmov_reg_long );
+%}
+
+// Compare 2 longs and CMOVE ints.
+instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne ));
+  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne ));
+  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
+  ins_cost(250);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
+  ins_pipe( pipe_cmov_mem );
+%}
+
+// Compare 2 longs and CMOVE ints.
+instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne ));
+  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
+  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne );
+  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
+  predicate( UseSSE==2 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne );
+  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovXD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovF_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::ne );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovX_regS(cmp,flags,dst,src);
+  %}
+%}
+
+//======
+// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
+// Same as cmpL_reg_flags_LEGT except must negate src
+instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eSIRegI tmp ) %{
+  match( Set flags (CmpL src zero ));
+  effect( KILL tmp );
+  ins_cost(300);
+  format %{ "XOR    ESI,ESI\t# Long compare for -$src < 0, use commuted test\n\t"
+            "CMP    ESI,$src.lo\n\t"
+            "SBB    ESI,$src.hi\n\t" %}
+  ins_encode( long_cmp_flags3(src) );
+  ins_pipe( ialu_reg_reg_long );
+%}
+
+// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
+// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
+// requires a commuted test to get the same result.
+instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eSIRegI tmp ) %{
+  match( Set flags (CmpL src1 src2 ));
+  effect( KILL tmp );
+  ins_cost(300);
+  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
+            "MOV    ESI,$src2.hi\n\t"
+            "SBB    ESI,$src1.hi\t! Compute flags for long compare" %}
+  ins_encode( long_cmp_flags2( src2, src1 ) );
+  ins_pipe( ialu_cr_reg_reg );
+%}
+
+// Long compares reg < zero/req OR reg >= zero/req.
+// Just a wrapper for a normal branch, plus the predicate test
+instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
+  match(If cmp flags);
+  effect(USE labl);
+  predicate( _kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le );
+  ins_cost(300);
+  expand %{
+    jmpCon(cmp,flags,labl);    // JGT or JLE...
+  %}
+%}
+
+// Compare 2 longs and CMOVE longs.
+instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
+  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt ));
+  ins_cost(400);
+  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
+            "CMOV$cmp $dst.hi,$src.hi" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
+  ins_pipe( pipe_cmov_reg_long );
+%}
+
+instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
+  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt ));
+  ins_cost(500);
+  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
+            "CMOV$cmp $dst.hi,$src.hi+4" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
+  ins_pipe( pipe_cmov_reg_long );
+%}
+
+// Compare 2 longs and CMOVE ints.
+instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt ));
+  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt ));
+  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
+  ins_cost(250);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
+  ins_pipe( pipe_cmov_mem );
+%}
+
+// Compare 2 longs and CMOVE ptrs.
+instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
+  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt ));
+  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  format %{ "CMOV$cmp $dst,$src" %}
+  opcode(0x0F,0x40);
+  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
+  ins_pipe( pipe_cmov_reg );
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
+  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt );
+  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
+  predicate( UseSSE==2 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt );
+  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovXD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovF_regS(cmp,flags,dst,src);
+  %}
+%}
+
+
+instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->is_Bool()->_test._test == BoolTest::gt );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovX_regS(cmp,flags,dst,src);
+  %}
+%}
+
+// ============================================================================
+// inlined locking and unlocking
+
+instruct cmpFastLock( eFlagsReg cr, naxRegP object, naxRegP box, eAXRegI tmp) %{
+  match( Set cr (FastLock object box) );
+  effect( KILL tmp );
+  ins_cost(300);
+  format %{ "FASTLOCK $object, $box, kill EAX" %}
+  ins_encode( Fast_Lock(object,box,tmp) );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+instruct cmpFastUnlock( eFlagsReg cr, nabxRegP object, eAXRegP box, eBXRegP tmp ) %{
+  match( Set cr (FastUnlock object box) );
+  effect( KILL box, KILL tmp );
+  ins_cost(300);
+  format %{ "FASTUNLOCK $object, kills $box, EBX" %}
+  ins_encode( Fast_Unlock(object,box,tmp) );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+// ============================================================================
+// Safepoint Instrucions
+instruct safePoint( ) %{
+  match(SafePoint);
+  predicate(!SafepointPolling);
+  format %{ "Safepoint_ " %}
+  opcode(0x90); /* NOP = 0x90 */
+  ins_encode( OpcP, OpcP, safepoint_reloc );
+  ins_pipe( empty );
+%}
+
+instruct safePoint_poll(eFlagsReg cr) %{
+  match(SafePoint);
+  predicate(SafepointPolling);
+  effect(KILL cr);
+
+  format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
+  size(6);
+  ins_cost(125);
+  ins_encode( Safepoint_Poll() );
+  ins_pipe( ialu_reg_mem );
+%}
+
+// ============================================================================
+// Procedure Call/Return Instructions
+// Call Java Static Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallStaticJavaDirect(method meth) %{
+  match(CallStaticJava);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL,static " %}
+  opcode(0xE8); /* E8 cd */
+  ins_encode( pre_call_FPU,
+              Java_Static_Call( meth ),
+              call_epilog,
+              post_call_FPU );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(4);
+%}
+
+// Call Java Dynamic Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallDynamicJavaDirect(method meth) %{
+  match(CallDynamicJava);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "MOV    EAX,(oop)-1\n\t"
+            "CALL,dynamic" %}
+  opcode(0xE8); /* E8 cd */
+  ins_encode( pre_call_FPU,
+              Java_Dynamic_Call( meth ),
+              call_epilog,
+              post_call_FPU );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(4);
+%}
+
+// Call Compiled Java Instruction
+// Required: Used in converter frame from interpreter to compiler
+instruct CallCompiledJavaDirect( method meth, eBPRegP interp_fp ) %{
+  match(CallCompiledJava);
+  effect(USE meth, KILL interp_fp);
+
+  ins_cost(300);
+  format %{ "CALL    *[EAX+compiled_code_entry_point_offset] // compiled code" %}
+  opcode(0xFF, 0x02); /* FF /2 */
+  ins_encode( Java_Compiled_Call( meth ),
+              FFree_Float_Stack_After_Return );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+// Call Java Interpreter Instruction
+// Required: Used in converter frame from compiled code to interpreter
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallInterpreterDirect( method meth ) %{
+  match(CallInterpreter);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL,interpreter " %}
+  opcode(0xE8); /* E8 cd */
+  // Use FFREEs to clear entries in float stack
+  ins_encode( FFree_Float_Stack_All,
+              Xor_Reg(EBP),
+              Java_To_Runtime( meth ) );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(4);
+%}
+
+// Call Runtime Instruction
+instruct CallRuntimeDirect(method meth) %{
+  match(CallRuntime );
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL,runtime " %}
+  opcode(0xE8); /* E8 cd */
+  // Use FFREEs to clear entries in float stack
+  ins_encode( pre_call_FPU,
+              FFree_Float_Stack_All,
+              Java_To_Runtime( meth ),
+              post_call_FPU );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+// Call runtime without safepoint
+instruct CallLeafDirect(method meth) %{
+  match(CallLeaf);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL_LEAF,runtime " %}
+  opcode(0xE8); /* E8 cd */
+  ins_encode( pre_call_FPU,
+              FFree_Float_Stack_All,
+              Java_To_Runtime( meth ),
+              Verify_FPU_For_Leaf, post_call_FPU );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+instruct CallLeafNoFPDirect(method meth) %{
+  match(CallLeafNoFP);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL_LEAF_NOFP,runtime " %}
+  opcode(0xE8); /* E8 cd */
+  ins_encode(Java_To_Runtime(meth));
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+
+// Return Instruction
+// Remove the return address & jump to it.
+// Notice: We always emit a nop after a ret to make sure there is room
+// for safepoint patching
+instruct Ret() %{
+  match(Return);
+  format %{ "RET" %}
+  opcode(0xC3);
+  ins_encode(RetWithNops());
+  ins_pipe( pipe_jmp );
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(eRegP jump_target, eAXRegP method_oop) %{
+  match(TailCall jump_target method_oop );
+  ins_cost(300);
+  format %{ "JMP    $jump_target \t# EAX holds method oop" %}
+  opcode(0xFF, 0x4);  /* Opcode FF /4 */
+  ins_encode( OpcP, RegOpc(jump_target) );
+  ins_pipe( pipe_jmp );
+%}
+
+
+// Tail Jump; remove the return address; jump to target.
+// TailCall above leaves the return address around.
+instruct tailjmpInd(eRegP jump_target, eAXRegP ex_oop) %{
+  match( TailJump jump_target ex_oop );
+  ins_cost(300);
+  format %{ "POP    EDX\t# pop return address into dummy\n\t"
+            "JMP    $jump_target " %}
+  opcode(0xFF, 0x4);  /* Opcode FF /4 */
+  ins_encode( enc_pop_edx,
+              OpcP, RegOpc(jump_target) );
+  ins_pipe( pipe_jmp );
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler.  No code emitted.
+instruct CreateException( eAXRegP ex_oop )
+%{
+  match(Set ex_oop (CreateEx));
+
+  size(0);
+  // use the following format syntax
+  format %{ "# exception oop is in EAX; no code emitted" %}
+  ins_encode();
+  ins_pipe( empty );
+%}
+
+
+// Rethrow exception:
+// The exception oop will come in the first argument position.
+// Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException()
+%{
+  match(Rethrow);
+
+  // use the following format syntax
+  format %{ "JMP    rethrow_stub" %}
+  ins_encode(enc_rethrow);
+  ins_pipe( pipe_jmp );
+%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceeding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+//  [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser.  An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == EAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(eRegI dst, eRegI src) %{
+//   match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+//   match(Set dst (AddI dst src));
+//   effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+//   // increment preceeded by register-register move
+//   peepmatch ( incI_eReg movI );
+//   // require that the destination register of the increment
+//   // match the destination register of the move
+//   peepconstraint ( 0.dst == 1.dst );
+//   // construct a replacement instruction that sets
+//   // the destination to ( move's source register + one )
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// Implementation no longer uses movX instructions since
+// machine-independent system no longer uses CopyX nodes.
+//
+// peephole %{
+//   peepmatch ( incI_eReg movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( decI_eReg movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( addI_eReg_imm movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( addP_eReg_imm movP );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, eRegI src) %{
+//   match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(eRegI dst, memory mem) %{
+//   match(Set dst (LoadI mem));
+// %}
+//
+//peephole %{
+//  peepmatch ( loadI storeI );
+//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
+//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
+//%}
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,534 @@
+/*
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_nativeInst_mips.cpp.incl"
+
+void NativeInstruction::wrote(int offset) {
+  ICache::invalidate_word(addr_at(offset));
+}
+
+void NativeInstruction::set_long_at(int offset, int i) {
+  address addr = addr_at(offset);
+  *(int*)addr = i;
+  //ICache::invalidate_word(addr);
+}
+
+void NativeCall::verify() {
+/*
+	// Make sure code pattern is actually a call imm32 instruction.
+  int inst = ubyte_at(0);
+  if (inst != instruction_code) {
+    tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", instruction_address(),
+                                                        inst);
+    fatal("not a call disp32");
+  }
+*/
+	// make sure code pattern is actually a call instruction
+	if ( !is_op(Assembler::lui_op) ||
+		!is_op(long_at(4), Assembler::addiu_op) ||
+		!is_special_op(long_at(8), Assembler::jalr_op) ) {
+	 	fatal("not a call");
+	}
+}
+
+static int illegal_instruction_bits = 0;
+
+int NativeInstruction::illegal_instruction() {
+	if (illegal_instruction_bits == 0) {
+		ResourceMark rm;
+		char buf[40];
+		CodeBuffer cbuf((address)&buf[0], 20);
+		MacroAssembler* a = new MacroAssembler(&cbuf);
+		address ia = a->pc();
+		a->brk(11);
+		int bits = *(int*)ia;
+		illegal_instruction_bits = bits;
+	}
+	return illegal_instruction_bits;
+}
+
+bool NativeInstruction::is_int_branch() {
+	switch(Assembler::opcode(insn_word())) {
+		case Assembler::beq_op:
+		case Assembler::beql_op:
+		case Assembler::bgtz_op:
+		case Assembler::bgtzl_op:
+		case Assembler::blez_op:
+		case Assembler::blezl_op:
+		case Assembler::bne_op:
+		case Assembler::bnel_op:
+			return true;
+		case Assembler::regimm_op:
+			switch(Assembler::rt(insn_word())) {
+				case Assembler::bgez_op:
+				case Assembler::bgezal_op:
+				case Assembler::bgezall_op:
+				case Assembler::bgezl_op:
+				case Assembler::bltz_op:
+				case Assembler::bltzal_op:
+				case Assembler::bltzall_op:
+				case Assembler::bltzl_op:
+					return true;
+			}
+	}
+
+	return false;
+}
+
+bool NativeInstruction::is_float_branch() {
+	if (!is_op(Assembler::cop1_op) ||
+			!is_rs((Register)Assembler::bc_op)) return false;
+
+	switch(Assembler::rt(insn_word())) {
+		case Assembler::bcf_op:
+		case Assembler::bcfl_op:
+		case Assembler::bct_op:
+		case Assembler::bctl_op:
+			return true;
+	}
+
+	return false;
+}
+
+
+
+address NativeCall::destination() const {
+	return (address)Assembler::merge(long_at(4)&0xffff, long_at(0)&0xffff);
+}
+
+void NativeCall::print() {
+  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
+                instruction_address(), destination());
+}
+
+// Inserts a native call instruction at a given pc
+void NativeCall::insert(address code_pos, address entry) {
+  NativeCall *call = nativeCall_at(code_pos);
+	CodeBuffer cb(call->addr_at(0), instruction_size);
+	MacroAssembler masm(&cb);
+#define __ masm.
+	//__ move (T9, (int)entry);
+	__ lui(T9, Assembler::split_high((int)entry));
+	__ addiu(T9, T9, Assembler::split_low((int)entry));
+	__ jalr ();
+#undef __
+
+	ICache::invalidate_range(call->addr_at(0), instruction_size);
+	//  ICache::invalidate_all();
+}
+
+// MT-safe patching of a call instruction.
+// First patches first word of instruction to two jmp's that jmps to them
+// selfs (spinlock). Then patches the last byte, and then atomicly replaces
+// the jmp's with the first 4 byte of the new instruction.
+void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
+/*
+	assert(Patching_lock->is_locked() ||
+         SafepointSynchronize::is_at_safepoint(), "concurrent code patching");
+  assert (instr_addr != NULL, "illegal address for code patching");
+
+  NativeCall* n_call =  nativeCall_at (instr_addr); // checking that it is a call
+  if (os::is_MP()) {
+    guarantee((intptr_t)instr_addr % BytesPerWord == 0, "must be aligned");
+  }
+
+  // First patch dummy jmp in place
+  unsigned char patch[4];
+  assert(sizeof(patch)==sizeof(jint), "sanity check");
+  patch[0] = 0xEB;       // jmp rel8
+  patch[1] = 0xFE;       // jmp to self
+  patch[2] = 0xEB;
+  patch[3] = 0xFE;
+
+  // First patch dummy jmp in place
+  *(jint*)instr_addr = *(jint *)patch;
+
+  // Invalidate.  Opteron requires a flush after every write.
+  n_call->wrote(0);
+
+  // Patch 4th byte
+  instr_addr[4] = code_buffer[4];
+
+  n_call->wrote(4);
+
+  // Patch bytes 0-3
+  *(jint*)instr_addr = *(jint *)code_buffer;
+
+  n_call->wrote(0);
+
+#ifdef ASSERT
+   // verify patching
+   for ( int i = 0; i < instruction_size; i++) {
+     address ptr = (address)((intptr_t)code_buffer + i);
+     int a_byte = (*ptr) & 0xFF;
+     assert(*((address)((intptr_t)instr_addr + i)) == a_byte, "mt safe patching failed");
+   }
+#endif
+*/
+	Unimplemented();
+}
+
+/*
+// Similar to replace_mt_safe, but just changes the destination.  The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times.  If the displacement field is aligned
+// we can simply rely on atomicity of 32-bit writes to make sure other threads
+// will see no intermediate states.  Otherwise, the first two bytes of the
+// call are guaranteed to be aligned, and can be atomically patched to a
+// self-loop to guard the instruction while we change the other bytes.
+
+// We cannot rely on locks here, since the free-running threads must run at
+// full speed.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+// (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
+void NativeCall::set_destination_mt_safe(address dest) {
+  debug_only(verify());
+  // Make sure patching code is locked.  No two threads can patch at the same
+  // time but one may be executing this code.
+  assert(Patching_lock->is_locked() ||
+         SafepointSynchronize::is_at_safepoint(), "concurrent code patching");
+  // Both C1 and C2 should now be generating code which aligns the patched address
+  // to be within a single cache line except that C1 does not do the alignment on
+  // uniprocessor systems.
+  bool is_aligned = ((uintptr_t)displacement_address() + 0) / cache_line_size ==
+                    ((uintptr_t)displacement_address() + 3) / cache_line_size;
+
+  guarantee(!os::is_MP() || is_aligned, "destination must be aligned");
+
+  if (is_aligned) {
+    // Simple case:  The destination lies within a single cache line.
+    set_destination(dest);
+  } else if ((uintptr_t)instruction_address() / cache_line_size ==
+             ((uintptr_t)instruction_address()+1) / cache_line_size) {
+    // Tricky case:  The instruction prefix lies within a single cache line.
+    intptr_t disp = dest - return_address();
+#ifdef AMD64
+    guarantee(disp == (intptr_t)(jint)disp, "must be 32-bit offset");
+#endif // AMD64
+
+    int call_opcode = instruction_address()[0];
+
+    // First patch dummy jump in place:
+    {
+      u_char patch_jump[2];
+      patch_jump[0] = 0xEB;       // jmp rel8
+      patch_jump[1] = 0xFE;       // jmp to self
+
+      assert(sizeof(patch_jump)==sizeof(short), "sanity check");
+      *(short*)instruction_address() = *(short*)patch_jump;
+    }
+    // Invalidate.  Opteron requires a flush after every write.
+    wrote(0);
+
+    // (Note: We assume any reader which has already started to read
+    // the unpatched call will completely read the whole unpatched call
+    // without seeing the next writes we are about to make.)
+
+    // Next, patch the last three bytes:
+    u_char patch_disp[5];
+    patch_disp[0] = call_opcode;
+    *(int32_t*)&patch_disp[1] = (int32_t)disp;
+    assert(sizeof(patch_disp)==instruction_size, "sanity check");
+    for (int i = sizeof(short); i < instruction_size; i++)
+      instruction_address()[i] = patch_disp[i];
+
+    // Invalidate.  Opteron requires a flush after every write.
+    wrote(sizeof(short));
+
+    // (Note: We assume that any reader which reads the opcode we are
+    // about to repatch will also read the writes we just made.)
+
+    // Finally, overwrite the jump:
+    *(short*)instruction_address() = *(short*)patch_disp;
+    // Invalidate.  Opteron requires a flush after every write.
+    wrote(0);
+
+    debug_only(verify());
+    guarantee(destination() == dest, "patch succeeded");
+  } else {
+    // Impossible:  One or the other must be atomically writable.
+    ShouldNotReachHere();
+  }
+}
+*/
+
+void NativeMovConstReg::verify() {
+  if ( !is_op(Assembler::lui_op) ||
+	!is_op(long_at(4), Assembler::addiu_op) )
+  fatal("not a mov reg, imm32")
+}
+
+
+void NativeMovConstReg::print() {
+  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
+              	instruction_address(), data());
+}
+
+void NativeMovConstReg::set_data(int x) {
+	set_long_at(0, (long_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff));
+	set_long_at(4, (long_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff));
+	ICache::invalidate_range(addr_at(0), 8);
+
+	//  ICache::invalidate_all();
+#ifndef CORE
+	// also store the value into an oop_Relocation cell, if any
+	CodeBlob* nm = CodeCache::find_blob(instruction_address());
+	if (nm != NULL) {
+		RelocIterator iter(nm, instruction_address(), instruction_address() + 1);
+		oop* oop_addr = NULL;
+		while (iter.next()) {
+			if (iter.type() == relocInfo::oop_type) {
+				oop_Relocation *r = iter.oop_reloc();
+				if (oop_addr == NULL && r->oop_index()!=0) {
+					oop_addr = r->oop_addr();
+					*oop_addr = (oop)x;
+				} else {
+					assert(oop_addr == r->oop_addr(), "must be only one set-oop here");
+					}
+				}
+		}
+	}
+
+#endif
+}
+
+//-------------------------------------------------------------------
+/*
+int NativeMovRegMem::instruction_start() const {
+  int off = 0;
+  u_char instr_0 = ubyte_at(off);
+
+  // First check to see if we have a (prefixed or not) xor
+  if ( instr_0 >= instruction_prefix_wide_lo &&      // 0x40
+       instr_0 <= instruction_prefix_wide_hi) { // 0x4f
+    off++;
+    instr_0 = ubyte_at(off);
+  }
+
+  if (instr_0 == instruction_code_xor) {
+    off += 2;
+    instr_0 = ubyte_at(off);
+  }
+
+  // Now look for the real instruction and the many prefix/size specifiers.
+
+  if (instr_0 == instruction_operandsize_prefix ) {  // 0x66
+    off++; // Not SSE instructions
+    instr_0 = ubyte_at(off);
+  }
+
+  if ( instr_0 == instruction_code_xmm_ss_prefix ||      // 0xf3
+       instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2
+    off++;
+    instr_0 = ubyte_at(off);
+  }
+
+  if ( instr_0 >= instruction_prefix_wide_lo &&      // 0x40
+       instr_0 <= instruction_prefix_wide_hi) { // 0x4f
+    off++;
+    instr_0 = ubyte_at(off);
+  }
+
+
+  if (instr_0 == instruction_extended_prefix ) {  // 0x0f
+    off++;
+  }
+
+  return off;
+}
+*/
+
+
+int NativeMovRegMem::offset() const{
+  if (is_immediate())
+    return (short)(long_at(instruction_offset)&0xffff);
+  else
+    return Assembler::merge(long_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff);
+}
+
+void NativeMovRegMem::set_offset(int x) {
+  if (is_immediate()) {
+    assert(Assembler::is_simm16(x), "just check");
+    set_long_at(0, (long_at(0)&0xffff0000) | (x&0xffff) );
+    if (is_64ldst()) {
+      assert(Assembler::is_simm16(x+4), "just check");
+			set_long_at(4, (long_at(4)&0xffff0000) | ((x+4)&0xffff) );
+		}
+  } else {
+    set_long_at(0, (long_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff));
+    set_long_at(4, (long_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff));
+  }
+  ICache::invalidate_range(addr_at(0), 8);
+}
+
+void NativeMovRegMem::verify() {
+	int offset = 0;
+
+	if ( Assembler::opcode(long_at(0)) == Assembler::lui_op ) {
+		if ( (Assembler::opcode(long_at(4)) != Assembler::addiu_op) ||
+				(Assembler::opcode(long_at(8)) != Assembler::special_op) ||
+				(Assembler::special(long_at(8)) != Assembler::add_op))
+			fatal ("not a mov [reg+offs], reg instruction");
+		offset += 12;
+	}
+
+	switch(Assembler::opcode(long_at(offset))) {
+	case Assembler::lb_op:
+	case Assembler::lbu_op:
+	case Assembler::lh_op:
+	case Assembler::lhu_op:
+	case Assembler::lw_op:
+	case Assembler::lwc1_op:
+	case Assembler::sb_op:
+	case Assembler::sh_op:
+	case Assembler::sw_op:
+	case Assembler::swc1_op:
+		break;
+	default:
+		fatal ("not a mov [reg+offs], reg instruction");
+	}
+}
+
+
+void NativeMovRegMem::print() {
+  tty->print_cr("0x%x: mov reg, [reg + %x]", instruction_address(), offset());
+}
+
+
+
+void NativeIllegalInstruction::insert(address code_pos) {
+  CodeBuffer cb(code_pos, instruction_size);
+  MacroAssembler masm(&cb);
+#define __ masm.
+  __ brk(11);
+#undef __
+
+  ICache::invalidate_range(code_pos, instruction_size);
+}
+
+void NativeGeneralJump::verify() {
+  assert(((NativeInstruction *)this)->is_jump() ||
+         ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction");
+}
+
+
+void  NativeGeneralJump::set_jump_destination(address dest) {
+ 	OrderAccess::fence();
+
+	if (is_short()) {
+		assert(Assembler::is_simm16(dest-addr_at(4)), "change this code");
+		set_long_at(0, (long_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff );
+		ICache::invalidate_range(addr_at(0), 4);
+	} else {
+		set_long_at(0, (long_at(0) & 0xffff0000) | (Assembler::split_high((int)dest) & 0xffff));
+		set_long_at(4, (long_at(4) & 0xffff0000) | (Assembler::split_low((int)dest) & 0xffff));
+		ICache::invalidate_range(addr_at(0), 8);
+	}
+}
+
+// we now use b to do this. be careful when using this method
+// by yjl 9/16/2005
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+  CodeBuffer cb(code_pos, instruction_size + 4);
+  MacroAssembler masm(&cb);
+#define __ masm.
+  //__ move (rs, (int)entry);
+  //__ lui(AT, Assembler::split_high((int)entry));
+  //__ addiu(AT, AT, Assembler::split_low((int)entry));
+  //__ jr (AT);
+  __ b(entry);
+  __ delayed()->nop();
+#undef __
+
+  ICache::invalidate_range(code_pos, instruction_size + 4);
+}
+
+
+// MT-safe patching of a long jump instruction.
+// First patches first word of instruction to two jmp's that jmps to them
+// selfs (spinlock). Then patches the last byte, and then atomicly replaces
+// the jmp's with the first 4 byte of the new instruction.
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+	NativeGeneralJump* h_jump =  nativeGeneralJump_at (instr_addr);
+	int i0 = ((int*)code_buffer)[0];
+	int i1 = ((int*)code_buffer)[1];
+	int i2 = ((int*)code_buffer)[2];
+
+	// beq ZERO, ZERO, -1
+	// 0001 0000 0000 0000 1111 1111 1111 1111
+	//h_jump->set_long_at(0*BytesPerInstWord, 0x1000ffff);
+	h_jump->set_long_at(2*BytesPerInstWord, i2);
+	h_jump->set_long_at(1*BytesPerInstWord, i1);
+	h_jump->set_long_at(0*BytesPerInstWord, i0);
+
+	ICache::invalidate_range(h_jump->addr_at(0), instruction_size);
+
+ //ICache::invalidate_all();
+}
+
+// NOTE : here i use T9 as the destination register, maybe i should get some hint from entry. FIXME
+// by yjl 8/30/2005
+void NativeGeneralJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+	unsigned int code_buffer[4];
+	address tmp = (address)code_buffer;
+	// lui(T9, Assembler::split_high(dest))
+	// 0011 1100 0001 1001 Assembler::split_high(dest)
+	*(unsigned short *)tmp = (unsigned short)Assembler::split_high((int)dest);
+	tmp += 2;
+	*(unsigned short *)tmp = (unsigned short)(0x3c19);
+	tmp += 2;
+	//addiu(T9, T9, Assembler::split_low(dest))
+	*(unsigned short *)tmp = (unsigned short)Assembler::split_low((int)dest);
+	tmp += 2;
+	*(unsigned short *)tmp = (unsigned short)(0x2739);
+	tmp += 2;
+	// jr(T9)
+	*(unsigned int *)tmp = (unsigned int)0x03200008;
+	tmp += 4;
+	// nop
+	*(unsigned int *)tmp = (unsigned int)0;
+
+#ifndef CORE
+	check_verified_entry_alignment(entry, verified_entry);
+#endif /* CORE */
+
+	*(unsigned int *)(verified_entry + 0)  = code_buffer[0];
+	*(unsigned int *)(verified_entry + 4)  = code_buffer[1];
+	*(unsigned int *)(verified_entry + 8)  = code_buffer[2];
+	*(unsigned int *)(verified_entry + 12) = code_buffer[3];
+
+	ICache::invalidate_range(verified_entry, instruction_size + 4);
+
+	//ICache::invalidate_all();
+}
+
+
+bool NativeInstruction::is_dtrace_trap() {
+  //return (*(int32_t*)this & 0xff) == 0xcc;
+	Unimplemented();
+	return false;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,496 @@
+/*
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// We have interfaces for the following instructions:
+// - NativeInstruction
+// - - NativeCall
+// - - NativeMovConstReg
+// - - NativeMovConstRegPatching
+// - - NativeMovRegMem
+// - - NativeMovRegMemPatching
+// - - NativeJump
+// - - NativeIllegalOpCode
+// - - NativeGeneralJump
+// - - NativeReturn
+// - - NativeReturnX (return with argument)
+// - - NativePushConst
+// - - NativeTstRegMem
+
+// The base class for different kinds of native instruction abstractions.
+// Provides the primitive operations to manipulate code relative to this.
+
+class NativeInstruction VALUE_OBJ_CLASS_SPEC {
+  friend class Relocation;
+
+ public:
+  enum mips_specific_constants {
+    nop_instruction_code        =    0,	//sll zero, zero, zero
+    nop_instruction_size        =    4
+  };
+
+  bool is_nop()                        { return long_at(0) == nop_instruction_code; }
+  bool is_dtrace_trap();
+  inline bool is_call();
+  inline bool is_illegal();
+  inline bool is_return();
+  inline bool is_jump();
+  inline bool is_cond_jump();
+  inline bool is_safepoint_poll();
+
+	//mips has no instruction to generate a illegal instrucion exception
+	//we define ours: break 11
+	static int illegal_instruction();
+
+	bool is_int_branch();
+	bool is_float_branch();
+
+
+ protected:
+  address addr_at(int offset) const    { return address(this) + offset; }
+  address instruction_address() const       { return addr_at(0); }
+  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
+  address prev_instruction_address() const	{ return addr_at(-BytesPerInstWord); }
+
+  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
+  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
+  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
+  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
+  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
+  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
+
+
+  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
+  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
+  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
+  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
+  void set_long_at(int offset, int  i);
+  //void set_jlong_at(int offset, jlong i);
+  //void set_addr_at(int offset, address x);
+
+  int  insn_word() const { return long_at(0); }
+	static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; }
+	bool is_op (Assembler::ops op)     const { return is_op(insn_word(), op); }
+  bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs; }
+  bool is_rs (Register rs)           const { return is_rs(insn_word(), rs); }
+  bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt; }
+  bool is_rt (Register rt) 					 const { return is_rt(insn_word(), rt); }
+
+	static bool is_special_op (int insn, Assembler::special_ops op) {
+		return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op;
+	}
+  bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); }
+
+  // This doesn't really do anything on Intel, but it is the place where
+  // cache invalidation belongs, generically:
+  void wrote(int offset);
+
+ public:
+
+  // unit test stuff
+  static void test() {}                 // override for testing
+
+  inline friend NativeInstruction* nativeInstruction_at(address address);
+};
+
+inline NativeInstruction* nativeInstruction_at(address address) {
+  NativeInstruction* inst = (NativeInstruction*)address;
+#ifdef ASSERT
+  //inst->verify();
+#endif
+  return inst;
+}
+
+inline NativeCall* nativeCall_at(address address);
+// The NativeCall is an abstraction for accessing/manipulating native call imm32/rel32off
+// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
+// MIPS has no call instruction with imm32. Usually, a call was done like this:
+// 			lui 		rt, imm16
+// 			addiu 	rt, rt, imm16
+// 			jalr 		rt
+// we just consider the above three instruction as one call instruction
+class NativeCall: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    //instruction_code            = 0xE8,
+    instruction_offset          =    0,
+    instruction_size            =   12,
+    return_address_offset       =   16,
+    displacement_offset         =    0
+  };
+
+  //enum { cache_line_size = BytesPerWord };  // conservative estimate!
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  { return addr_at(return_address_offset); }
+  address return_address() const            { return addr_at(return_address_offset); }
+  address destination() const;
+  void  set_destination(address dest)       {
+    OrderAccess::fence();
+		set_long_at(0, (long_at(0) & 0xffff0000) | (Assembler::split_high((int)dest) & 0xffff));
+		set_long_at(4, (long_at(4) & 0xffff0000) | (Assembler::split_low((int)dest) & 0xffff));
+		ICache::invalidate_range(addr_at(0), 8);
+	}
+  void  set_destination_mt_safe(address dest) { set_destination(dest);}
+
+  //void  verify_alignment() { assert((intptr_t)addr_at(displacement_offset) % BytesPerInt == 0, "must be aligned"); }
+  void  verify_alignment() {  }
+  void  verify();
+  void  print();
+
+  // Creation
+  inline friend NativeCall* nativeCall_at(address address);
+  inline friend NativeCall* nativeCall_before(address return_address);
+
+  static bool is_call_at(address instr) {
+    //return ((*instr) & 0xFF) == NativeCall::instruction_code;
+		return nativeInstruction_at(instr)->is_call();
+  }
+
+  static bool is_call_before(address return_address) {
+    return is_call_at(return_address - NativeCall::return_address_offset);
+  }
+
+  static bool is_call_to(address instr, address target) {
+    return nativeInstruction_at(instr)->is_call() &&
+      nativeCall_at(instr)->destination() == target;
+  }
+
+  // MT-safe patching of a call instruction.
+  static void insert(address code_pos, address entry);
+
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+};
+
+inline NativeCall* nativeCall_at(address address) {
+  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+inline NativeCall* nativeCall_before(address return_address) {
+  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+// An interface for accessing/manipulating native mov reg, imm32 instructions.
+// (used to manipulate inlined 32bit data dll calls, etc.)
+//we use two instructions to implement this:
+//		lui rd, imm16
+//		addiu rd, immm16
+//see MacroAssembler::move(Register, int)
+class NativeMovConstReg: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    instruction_offset  	=    0,
+    instruction_size  	      	=    8,
+    next_instruction_offset 	=    8,
+  };
+
+  int     insn_word() const                 { return long_at(instruction_offset); }
+  address instruction_address() const       { return addr_at(0); }
+  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
+  intptr_t data() const                     {	return Assembler::merge(long_at(4)&0xffff, long_at(0)&0xffff); }
+  void    set_data(intptr_t x);
+
+
+  void  verify();
+  void  print();
+
+  // unit test stuff
+  static void test() {}
+
+  // Creation
+  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
+  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
+};
+
+inline NativeMovConstReg* nativeMovConstReg_at(address address) {
+  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+inline NativeMovConstReg* nativeMovConstReg_before(address address) {
+  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+class NativeMovConstRegPatching: public NativeMovConstReg {
+ private:
+    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
+    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+
+// An interface for accessing/manipulating native moves of the form:
+// 			lui   AT, split_high(offset)
+// 			addiu AT, split_low(offset)
+// 			add   reg, reg, AT
+// 			lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0
+// 			[lw/sw/lwc1/swc1                    dest, reg, 4]
+// 		or
+// 			lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset
+// 			[lw/sw/lwc1/swc1                    dest, reg, offset+4]
+//
+// Warning: These routines must be able to handle any instruction sequences
+// that are generated as a result of the load/store byte,word,long
+// macros.
+
+class NativeMovRegMem: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    instruction_offset  = 0,
+    hiword_offset 	= 4,
+    ldst_offset   	= 12,
+    immediate_size	= 4,
+    ldst_size     	= 16
+  };
+
+  //offset is less than 16 bits.
+  bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); }
+  bool is_64ldst() const {
+    if (is_immediate()) {
+      return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) &&
+	     (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize);
+    } else {
+      return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) &&
+	     (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize);
+    }
+  }
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  {
+    return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0));
+  }
+/*  // helper
+  int instruction_start() const;
+
+  address instruction_address() const;
+
+  address next_instruction_address() const;
+*/
+
+  int   offset() const;
+
+  void  set_offset(int x);
+
+  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
+
+  void verify();
+  void print ();
+
+  // unit test stuff
+  static void test() {}
+
+ private:
+  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
+};
+
+inline NativeMovRegMem* nativeMovRegMem_at (address address) {
+  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+class NativeMovRegMemPatching: public NativeMovRegMem {
+ private:
+  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
+    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+
+
+// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional
+// far jump:
+// 			lui   reg, split_high(addr)
+// 			addiu reg, split_low(addr)
+// 			jr    reg
+// or
+// 			beq 	ZERO, ZERO, offset
+class NativeGeneralJump: public NativeInstruction {
+public:
+  enum mips_specific_constants {
+    instruction_offset 	=    0,
+    beq_opcode         	=    0x10000000,//000100|00000|00000|offset
+    b_mask       	=    0xffff0000,
+    short_size    	=    4,
+    instruction_size   =    12
+  };
+
+  bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; }
+  address instruction_address() const { return addr_at(instruction_offset); }
+  address jump_destination() const {
+    if ( is_short() ) {
+      return addr_at(short_size) + Assembler::imm_off(long_at(instruction_offset)) * 4;
+    }
+      return (address)Assembler::merge(long_at(4)&0xffff, long_at(instruction_offset)&0xffff);
+  }
+
+  void  set_jump_destination(address dest);
+
+	// Creation
+  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
+
+	// Insertion of native general jump instruction
+  static void insert_unconditional(address code_pos, address entry);
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+  static void check_verified_entry_alignment(address entry, address verified_entry){}
+  static void patch_verified_entry(address entry, address verified_entry, address dest);
+
+  void verify();
+};
+
+inline NativeGeneralJump* nativeGeneralJump_at(address address) {
+  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
+  debug_only(jump->verify();)
+  return jump;
+}
+
+/*class NativePopReg : public NativeInstruction {
+  public:
+  enum Intel_specific_constants {
+  instruction_code            = 0x58,
+  instruction_size            =    1,
+  instruction_offset          =    0,
+  data_offset                 =    1,
+  next_instruction_offset     =    1
+  };
+
+// Insert a pop instruction
+static void insert(address code_pos, Register reg);
+};*/
+
+
+class NativeIllegalInstruction: public NativeInstruction {
+public:
+  enum Intel_specific_constants {
+    instruction_size          =    4,
+    instruction_offset        =    0,
+    next_instruction_offset   =    4
+  };
+
+  // Insert illegal opcode as specific address
+  static void insert(address code_pos);
+};
+
+// return instruction that does not pop values of the stack
+class NativeReturn: public NativeInstruction {
+public:
+  enum Intel_specific_constants {
+    instruction_size          =    4,
+    instruction_offset        =    0,
+    next_instruction_offset   =    4
+  };
+};
+
+
+
+
+class NativeCondJump;
+inline NativeCondJump* nativeCondJump_at(address address);
+class NativeCondJump: public NativeInstruction {
+public:
+  enum mips_specific_constants {
+    instruction_size 	      = 16,
+    instruction_offset        = 12,
+    next_instruction_offset   = 20
+  };
+
+
+  int insn_word() const  { return long_at(instruction_offset); }
+  address instruction_address() const { return addr_at(0); }
+  address next_instruction_address() const { return addr_at(next_instruction_offset); }
+
+  // Creation
+  inline friend NativeCondJump* nativeCondJump_at(address address);
+
+  address jump_destination()  const {
+    return ::nativeCondJump_at(addr_at(12))->jump_destination();
+  }
+
+  void set_jump_destination(address dest) {
+    ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest);
+  }
+
+};
+
+inline NativeCondJump* nativeCondJump_at(address address) {
+  NativeCondJump* jump = (NativeCondJump*)(address);
+  return jump;
+}
+
+
+
+inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
+
+inline bool NativeInstruction::is_call()    {
+  return is_op(long_at(0), Assembler::lui_op) &&
+         is_op(long_at(4), Assembler::addiu_op) &&
+	 is_special_op(long_at(8), Assembler::jalr_op);
+}
+
+inline bool NativeInstruction::is_return()  { return is_special_op(Assembler::jr_op) && is_rs(RA);}
+
+inline bool NativeInstruction::is_jump() {
+  return ((long_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode) ||
+          (is_op(long_at(0), Assembler::lui_op) &&
+          is_op(long_at(4), Assembler::addiu_op) &&
+          is_special_op(long_at(8), Assembler::jr_op) &&
+          !is_rs(long_at(8), RA) );
+}
+
+inline bool NativeInstruction::is_cond_jump()    { return is_int_branch() || is_float_branch(); }
+
+			// is mips we have to use two instruction to poll, however, we don't want to bother checking two instructions
+			// instead, we use a lw $0, at() as the second instruction, and only check this.
+			// change ZERO -> AT, only in godson-2e @jerome,11/25/2006
+inline bool NativeInstruction::is_safepoint_poll() {
+  return
+         is_op(long_at(-4), Assembler::lui_op) &&
+         is_rt(long_at(-4), AT) &&
+         is_op(Assembler::lw_op) &&
+         is_rt(AT);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/nmethod_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,16 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)nmethod_mips.hpp	1.11 03/12/23 16:36:23 JVM"
+#endif
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+
+  // machine-dependent parts of class nmethod
+
+  public:
+
+  // Since we only patch a call at the return point of a frame
+  // we must find all live activations and evict them.
+  static bool evict_all_threads_at_deopt() { return true; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,42 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// machine-dependent implemention for register maps
+  friend class frame;
+
+ private:
+#ifndef CORE
+  // This is the hook for finding a register in an "well-known" location,
+  // such as a register block of a predetermined format.
+  // Since there is none, we just return NULL.
+  // See registerMap_sparc.hpp for an example of grabbing registers
+  // from register save areas of a standard layout.
+   address pd_location(VMReg reg) const {return NULL;}
+#endif
+
+  // no PD state to clear or copy:
+  void pd_clear() {}
+  void pd_initialize() {}
+  void pd_initialize_from(const RegisterMap* map) {}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2002-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_register_definitions_mips.cpp.incl"
+
+REGISTER_DEFINITION(Register, noreg);
+REGISTER_DEFINITION(Register, i0);
+REGISTER_DEFINITION(Register, i1);
+REGISTER_DEFINITION(Register, i2);
+REGISTER_DEFINITION(Register, i3);
+REGISTER_DEFINITION(Register, i4);
+REGISTER_DEFINITION(Register, i5);
+REGISTER_DEFINITION(Register, i6);
+REGISTER_DEFINITION(Register, i7);
+REGISTER_DEFINITION(Register, i8);
+REGISTER_DEFINITION(Register, i9);
+REGISTER_DEFINITION(Register, i10);
+REGISTER_DEFINITION(Register, i11);
+REGISTER_DEFINITION(Register, i12);
+REGISTER_DEFINITION(Register, i13);
+REGISTER_DEFINITION(Register, i14);
+REGISTER_DEFINITION(Register, i15);
+REGISTER_DEFINITION(Register, i16);
+REGISTER_DEFINITION(Register, i17);
+REGISTER_DEFINITION(Register, i18);
+REGISTER_DEFINITION(Register, i19);
+REGISTER_DEFINITION(Register, i20);
+REGISTER_DEFINITION(Register, i21);
+REGISTER_DEFINITION(Register, i22);
+REGISTER_DEFINITION(Register, i23);
+REGISTER_DEFINITION(Register, i24);
+REGISTER_DEFINITION(Register, i25);
+REGISTER_DEFINITION(Register, i26);
+REGISTER_DEFINITION(Register, i27);
+REGISTER_DEFINITION(Register, i28);
+REGISTER_DEFINITION(Register, i29);
+REGISTER_DEFINITION(Register, i30);
+REGISTER_DEFINITION(Register, i31);
+
+REGISTER_DEFINITION(FloatRegister, fnoreg);
+REGISTER_DEFINITION(FloatRegister, f0);
+REGISTER_DEFINITION(FloatRegister, f1);
+REGISTER_DEFINITION(FloatRegister, f2);
+REGISTER_DEFINITION(FloatRegister, f3);
+REGISTER_DEFINITION(FloatRegister, f4);
+REGISTER_DEFINITION(FloatRegister, f5);
+REGISTER_DEFINITION(FloatRegister, f6);
+REGISTER_DEFINITION(FloatRegister, f7);
+REGISTER_DEFINITION(FloatRegister, f8);
+REGISTER_DEFINITION(FloatRegister, f9);
+REGISTER_DEFINITION(FloatRegister, f10);
+REGISTER_DEFINITION(FloatRegister, f11);
+REGISTER_DEFINITION(FloatRegister, f12);
+REGISTER_DEFINITION(FloatRegister, f13);
+REGISTER_DEFINITION(FloatRegister, f14);
+REGISTER_DEFINITION(FloatRegister, f15);
+REGISTER_DEFINITION(FloatRegister, f16);
+REGISTER_DEFINITION(FloatRegister, f17);
+REGISTER_DEFINITION(FloatRegister, f18);
+REGISTER_DEFINITION(FloatRegister, f19);
+REGISTER_DEFINITION(FloatRegister, f20);
+REGISTER_DEFINITION(FloatRegister, f21);
+REGISTER_DEFINITION(FloatRegister, f22);
+REGISTER_DEFINITION(FloatRegister, f23);
+REGISTER_DEFINITION(FloatRegister, f24);
+REGISTER_DEFINITION(FloatRegister, f25);
+REGISTER_DEFINITION(FloatRegister, f26);
+REGISTER_DEFINITION(FloatRegister, f27);
+REGISTER_DEFINITION(FloatRegister, f28);
+REGISTER_DEFINITION(FloatRegister, f29);
+REGISTER_DEFINITION(FloatRegister, f30);
+REGISTER_DEFINITION(FloatRegister, f31);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/register_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_register_mips.cpp.incl"
+
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers;
+const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
+                                                                 FloatRegisterImpl::number_of_registers;
+//const int ConcreteRegisterImpl::max_fpr = FloatRegisterImpl::number_of_registers; //aoqi:which?
+
+const char* RegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
+		"t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
+		"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+		"t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+		"f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
+		"f8",  "f9",	"f10", "f11",  "f12", "f13",  "f14", "f15",
+		"f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
+		"f24", "f25",  "f26", "f27",  "f28", "f29",	"f30", "f31",
+  };
+  return is_valid() ? names[encoding()] : "fnoreg";
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/register_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class VMRegImpl;
+typedef VMRegImpl* VMReg;
+
+// Use Register as shortcut
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+
+// The implementation of integer registers for the ia32 architecture
+inline Register as_Register(int encoding) {
+  return (Register)(intptr_t) encoding;
+}
+
+class RegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+		integer_arg_base	= 4,
+		number_of_registers     = 32
+  };
+
+  // derived registers, offsets, and addresses
+  Register successor() const                          { return as_Register(encoding() + 1); }
+
+  // construction
+  inline friend Register as_Register(int encoding);
+
+  VMReg as_VMReg();
+
+  // accessors
+  int   encoding() const                         { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+};
+
+
+// The integer registers of the MIPS32 architecture
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+
+CONSTANT_REGISTER_DECLARATION(Register, i0,    (0));
+CONSTANT_REGISTER_DECLARATION(Register, i1,    (1));
+CONSTANT_REGISTER_DECLARATION(Register, i2,    (2));
+CONSTANT_REGISTER_DECLARATION(Register, i3,    (3));
+CONSTANT_REGISTER_DECLARATION(Register, i4,    (4));
+CONSTANT_REGISTER_DECLARATION(Register, i5,    (5));
+CONSTANT_REGISTER_DECLARATION(Register, i6,    (6));
+CONSTANT_REGISTER_DECLARATION(Register, i7,    (7));
+CONSTANT_REGISTER_DECLARATION(Register, i8,    (8));
+CONSTANT_REGISTER_DECLARATION(Register, i9,    (9));
+CONSTANT_REGISTER_DECLARATION(Register, i10,   (10));
+CONSTANT_REGISTER_DECLARATION(Register, i11,   (11));
+CONSTANT_REGISTER_DECLARATION(Register, i12,   (12));
+CONSTANT_REGISTER_DECLARATION(Register, i13,   (13));
+CONSTANT_REGISTER_DECLARATION(Register, i14,   (14));
+CONSTANT_REGISTER_DECLARATION(Register, i15,   (15));
+CONSTANT_REGISTER_DECLARATION(Register, i16,   (16));
+CONSTANT_REGISTER_DECLARATION(Register, i17,   (17));
+CONSTANT_REGISTER_DECLARATION(Register, i18,   (18));
+CONSTANT_REGISTER_DECLARATION(Register, i19,   (19));
+CONSTANT_REGISTER_DECLARATION(Register, i20,   (20));
+CONSTANT_REGISTER_DECLARATION(Register, i21,   (21));
+CONSTANT_REGISTER_DECLARATION(Register, i22,   (22));
+CONSTANT_REGISTER_DECLARATION(Register, i23,   (23));
+CONSTANT_REGISTER_DECLARATION(Register, i24,   (24));
+CONSTANT_REGISTER_DECLARATION(Register, i25,   (25));
+CONSTANT_REGISTER_DECLARATION(Register, i26,   (26));
+CONSTANT_REGISTER_DECLARATION(Register, i27,   (27));
+CONSTANT_REGISTER_DECLARATION(Register, i28,   (28));
+CONSTANT_REGISTER_DECLARATION(Register, i29,   (29));
+CONSTANT_REGISTER_DECLARATION(Register, i30,   (30));
+CONSTANT_REGISTER_DECLARATION(Register, i31,   (31));
+
+//o32 convention registers
+/*CONSTANT_REGISTER_DECLARATION(Register, zero  , ( 0));
+CONSTANT_REGISTER_DECLARATION(Register, at  , ( 1));
+CONSTANT_REGISTER_DECLARATION(Register, v0  , ( 2));
+CONSTANT_REGISTER_DECLARATION(Register, v1  , ( 3));
+CONSTANT_REGISTER_DECLARATION(Register, a0  , ( 4));
+CONSTANT_REGISTER_DECLARATION(Register, a1  , ( 5));
+CONSTANT_REGISTER_DECLARATION(Register, a2  , ( 6));
+CONSTANT_REGISTER_DECLARATION(Register, a3  , ( 7));
+CONSTANT_REGISTER_DECLARATION(Register, t0  , ( 8));
+CONSTANT_REGISTER_DECLARATION(Register, t1  , ( 9));
+CONSTANT_REGISTER_DECLARATION(Register, t2  , ( 10));
+CONSTANT_REGISTER_DECLARATION(Register, t3  , ( 11));
+CONSTANT_REGISTER_DECLARATION(Register, t4  , ( 12));
+CONSTANT_REGISTER_DECLARATION(Register, t5  , ( 13));
+CONSTANT_REGISTER_DECLARATION(Register, t6  , ( 14));
+CONSTANT_REGISTER_DECLARATION(Register, t7  , ( 15));
+CONSTANT_REGISTER_DECLARATION(Register, s0  , ( 16));
+CONSTANT_REGISTER_DECLARATION(Register, s1  , ( 17));
+CONSTANT_REGISTER_DECLARATION(Register, s2  , ( 18));
+CONSTANT_REGISTER_DECLARATION(Register, s3  , ( 19));
+CONSTANT_REGISTER_DECLARATION(Register, s4  , ( 20));
+CONSTANT_REGISTER_DECLARATION(Register, s5  , ( 21));
+CONSTANT_REGISTER_DECLARATION(Register, s6  , ( 22));
+CONSTANT_REGISTER_DECLARATION(Register, s7  , ( 23));
+CONSTANT_REGISTER_DECLARATION(Register, t8  , ( 24));
+CONSTANT_REGISTER_DECLARATION(Register, t9  , ( 25));
+CONSTANT_REGISTER_DECLARATION(Register, k0  , ( 26));
+CONSTANT_REGISTER_DECLARATION(Register, k1  , ( 27));
+CONSTANT_REGISTER_DECLARATION(Register, gp  , ( 28));
+CONSTANT_REGISTER_DECLARATION(Register, sp  , ( 29));
+CONSTANT_REGISTER_DECLARATION(Register, fp  , ( 30));
+CONSTANT_REGISTER_DECLARATION(Register, s8  , ( 30));
+CONSTANT_REGISTER_DECLARATION(Register, ra  , ( 31));*/
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define NOREG ((Register)(noreg_RegisterEnumValue))
+
+#define I0 ((Register)(i0_RegisterEnumValue))
+#define I1 ((Register)(i1_RegisterEnumValue))
+#define I2 ((Register)(i2_RegisterEnumValue))
+#define I3 ((Register)(i3_RegisterEnumValue))
+#define I4 ((Register)(i4_RegisterEnumValue))
+#define I5 ((Register)(i5_RegisterEnumValue))
+#define I6 ((Register)(i6_RegisterEnumValue))
+#define I7 ((Register)(i7_RegisterEnumValue))
+#define I8 ((Register)(i8_RegisterEnumValue))
+#define I9 ((Register)(i9_RegisterEnumValue))
+#define I10 ((Register)(i10_RegisterEnumValue))
+#define I11 ((Register)(i11_RegisterEnumValue))
+#define I12 ((Register)(i12_RegisterEnumValue))
+#define I13 ((Register)(i13_RegisterEnumValue))
+#define I14 ((Register)(i14_RegisterEnumValue))
+#define I15 ((Register)(i15_RegisterEnumValue))
+#define I16 ((Register)(i16_RegisterEnumValue))
+#define I17 ((Register)(i17_RegisterEnumValue))
+#define I18 ((Register)(i18_RegisterEnumValue))
+#define I19 ((Register)(i19_RegisterEnumValue))
+#define I20 ((Register)(i20_RegisterEnumValue))
+#define I21 ((Register)(i21_RegisterEnumValue))
+#define I22 ((Register)(i22_RegisterEnumValue))
+#define I23 ((Register)(i23_RegisterEnumValue))
+#define I24 ((Register)(i24_RegisterEnumValue))
+#define I25 ((Register)(i25_RegisterEnumValue))
+#define I26 ((Register)(i26_RegisterEnumValue))
+#define I27 ((Register)(i27_RegisterEnumValue))
+#define I28 ((Register)(i28_RegisterEnumValue))
+#define I29 ((Register)(i29_RegisterEnumValue))
+#define I30 ((Register)(i30_RegisterEnumValue))
+#define I31 ((Register)(i31_RegisterEnumValue))
+
+#define ZERO ((Register)(i0_RegisterEnumValue))
+#define AT ((Register)(i1_RegisterEnumValue))
+#define V0 ((Register)(i2_RegisterEnumValue))
+#define V1 ((Register)(i3_RegisterEnumValue))
+#define A0 ((Register)(i4_RegisterEnumValue))
+#define A1 ((Register)(i5_RegisterEnumValue))
+#define A2 ((Register)(i6_RegisterEnumValue))
+#define A3 ((Register)(i7_RegisterEnumValue))
+#define T0 ((Register)(i8_RegisterEnumValue))
+#define T1 ((Register)(i9_RegisterEnumValue))
+#define T2 ((Register)(i10_RegisterEnumValue))
+#define T3 ((Register)(i11_RegisterEnumValue))
+#define T4 ((Register)(i12_RegisterEnumValue))
+#define T5 ((Register)(i13_RegisterEnumValue))
+#define T6 ((Register)(i14_RegisterEnumValue))
+#define T7 ((Register)(i15_RegisterEnumValue))
+#define S0 ((Register)(i16_RegisterEnumValue))
+#define S1 ((Register)(i17_RegisterEnumValue))
+#define S2 ((Register)(i18_RegisterEnumValue))
+#define S3 ((Register)(i19_RegisterEnumValue))
+#define S4 ((Register)(i20_RegisterEnumValue))
+#define S5 ((Register)(i21_RegisterEnumValue))
+#define S6 ((Register)(i22_RegisterEnumValue))
+#define S7 ((Register)(i23_RegisterEnumValue))
+#define T8 ((Register)(i24_RegisterEnumValue))
+#define T9 ((Register)(i25_RegisterEnumValue))
+#define K0 ((Register)(i26_RegisterEnumValue))
+#define K1 ((Register)(i27_RegisterEnumValue))
+#define GP ((Register)(i28_RegisterEnumValue))
+#define SP ((Register)(i29_RegisterEnumValue))
+#define FP ((Register)(i30_RegisterEnumValue))
+#define S8 ((Register)(i30_RegisterEnumValue))
+#define RA ((Register)(i31_RegisterEnumValue))
+
+//for interpreter frame
+// bytecode pointer register
+#define BCP						S0
+// local variable pointer register
+#define LVP						S7
+// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
+// be sure to save and restore its value in call_stub
+#define TSR						S2
+
+#define TREG					S6
+
+#define FSR						V0
+#define SSR						V1
+#define FSF						F0
+#define SSF						F1
+#define FTF						F14
+#define STF						F15
+
+#define AFT						F30
+
+#define RECEIVER			T0
+#define IC_Klass			T1
+
+#define SHIFT_count		T3
+
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef FloatRegisterImpl* FloatRegister;
+
+inline FloatRegister as_FloatRegister(int encoding) {
+  return (FloatRegister)(intptr_t) encoding;
+}
+
+// The implementation of floating point registers for the ia32 architecture
+class FloatRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+		float_arg_base      = 12,
+    number_of_registers = 32
+  };
+
+  // construction
+  inline friend FloatRegister as_FloatRegister(int encoding);
+
+  VMReg as_VMReg();
+
+  // derived registers, offsets, and addresses
+  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
+
+  // accessors
+  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+
+};
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
+#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
+#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
+#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
+#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
+#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
+#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
+#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
+#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
+#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
+#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
+#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
+#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
+#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
+#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
+#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
+#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
+#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
+#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
+#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
+#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
+#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
+#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
+#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
+#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
+#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
+#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
+#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
+#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
+#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
+#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
+#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
+#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+
+const int MIPS_ARGS_IN_REGS_NUM = 4;
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+  enum {
+  // A big enough number for C2: all the registers plus flags
+  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
+  // There is no requirement that any ordering here matches any ordering c2 gives
+  // it's optoregs.
+    number_of_registers = RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers
+  };
+
+  static const int max_gpr;
+  static const int max_fpr;
+
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,140 @@
+/*
+ * Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_relocInfo_mips.cpp.incl"
+
+
+void Relocation::pd_set_data_value(address x, intptr_t o) {
+  //*pd_address_in_code() = x + o;
+	nativeMovConstReg_at(addr())->set_data((int)(x+o));
+}
+
+
+//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target,
+//Maybe We should FORGET CALL RELOCATION
+address Relocation::pd_call_destination(address orig_addr) {
+  intptr_t adj = 0;
+  NativeInstruction* ni = nativeInstruction_at(addr());
+  if (ni->is_call()) {
+    return nativeCall_at(addr())->destination() + adj;
+  } else if (ni->is_jump()) {
+    //return nativeJump_at(addr())->jump_destination() + adj;
+		return nativeGeneralJump_at(addr())->jump_destination() + adj;
+  } else if (ni->is_cond_jump()) {
+		return nativeCondJump_at(addr())->jump_destination() +adj;
+  } else {
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+
+void Relocation::pd_set_call_destination(address x) {
+  NativeInstruction* ni = nativeInstruction_at(addr());
+  if (ni->is_call()) {
+    nativeCall_at(addr())->set_destination(x);
+  } else if (ni->is_jump())
+    //NativeJump* nj = nativeJump_at(addr());
+    nativeGeneralJump_at(addr())->set_jump_destination(x);
+  else if (ni->is_cond_jump())
+		nativeCondJump_at(addr())->set_jump_destination(x);
+  else
+    { ShouldNotReachHere(); }
+
+    // Unresolved jumps are recognized by a destination of -1
+    // However 64bit can't actually produce such an address
+    // and encodes a jump to self but jump_destination will
+    // return a -1 as the signal. We must not relocate this
+    // jmp or the ic code will not see it as unresolved.
+/*
+    if (nj->jump_destination() == (address) -1) {
+      x = addr(); // jump to self
+    }
+    nj->set_jump_destination(x);
+  } else if (ni->is_cond_jump()) {
+    // %%%% kludge this, for now, until we get a jump_destination method
+    address old_dest = nativeGeneralJump_at(addr())->jump_destination();
+    address disp = Assembler::locate_operand(addr(), Assembler::call32_operand);
+    *(jint*)disp += (x - old_dest);
+  } else if (ni->is_mov_literal64()) {
+    ((NativeMovConstReg*)ni)->set_data((intptr_t)x);
+  } else {
+    ShouldNotReachHere();
+  }
+*/
+}
+
+
+address* Relocation::pd_address_in_code() {
+	//ShouldNotReachHere();
+	return (address*)addr();
+}
+
+
+address Relocation::pd_get_address_from_code() {
+	NativeMovConstReg* ni = nativeMovConstReg_at(addr());
+	return (address)ni->data();
+}
+
+int Relocation::pd_breakpoint_size() {
+  // minimum breakpoint size, in short words
+  return NativeIllegalInstruction::instruction_size / sizeof(short);
+}
+
+void Relocation::pd_swap_in_breakpoint(address x, short* instrs, int instrlen) {
+  Untested("pd_swap_in_breakpoint");
+  if (instrs != NULL) {
+    assert(instrlen * sizeof(short) == NativeIllegalInstruction::instruction_size, "enough instrlen in reloc. data");
+    for (int i = 0; i < instrlen; i++) {
+      instrs[i] = ((short*)x)[i];
+    }
+  }
+  NativeIllegalInstruction::insert(x);
+}
+
+
+void Relocation::pd_swap_out_breakpoint(address x, short* instrs, int instrlen) {
+  Untested("pd_swap_out_breakpoint");
+  assert(NativeIllegalInstruction::instruction_size == sizeof(short), "right address unit for update");
+  NativeInstruction* ni = nativeInstruction_at(x);
+  *(short*)ni->addr_at(0) = instrs[0];
+}
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+//	Unimplemented();
+}
+
+void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+//	Unimplemented();
+}
+
+void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+	address target =0;
+	NativeMovConstReg* ni = nativeMovConstReg_at(addr());
+	target = new_addr_for((address)ni->data(), src, dest);
+	ni->set_data((int)target);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,36 @@
+/*
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  // machine-dependent parts of class relocInfo
+ private:
+  enum {
+		// Since MIPS instructions are whole words,
+		// the two low-order offset bits can always be discarded.
+    offset_unit        =  4,
+
+		// There is no need for format bits; the instructions are
+		// sufficiently self-identifying.
+    format_width       =  0
+  };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/sharedRuntime_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,4303 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_sharedRuntime_mips.cpp.incl"
+
+#define __ masm->
+
+#ifdef COMPILER2
+UncommonTrapBlob*   SharedRuntime::_uncommon_trap_blob;
+#endif // COMPILER2
+
+DeoptimizationBlob* SharedRuntime::_deopt_blob;
+SafepointBlob*      SharedRuntime::_polling_page_safepoint_handler_blob;
+SafepointBlob*      SharedRuntime::_polling_page_return_handler_blob;
+RuntimeStub*        SharedRuntime::_wrong_method_blob;
+RuntimeStub*        SharedRuntime::_ic_miss_blob;
+RuntimeStub*        SharedRuntime::_resolve_opt_virtual_call_blob;
+RuntimeStub*        SharedRuntime::_resolve_virtual_call_blob;
+RuntimeStub*        SharedRuntime::_resolve_static_call_blob;
+
+class RegisterSaver {
+	enum { FPU_regs_live = 32 };
+	// Capture info about frame layout
+	enum layout {
+		fpr0_off, fpr1_off,
+		fpr2_off, fpr3_off,
+		fpr4_off, fpr5_off,
+		fpr6_off, fpr7_off,
+		fpr8_off, fpr9_off,
+		fpr10_off, fpr11_off,
+		fpr12_off, fpr13_off,
+		fpr14_off, fpr15_off,
+		fpr16_off, fpr17_off,
+		fpr18_off, fpr19_off,
+		fpr20_off, fpr21_off,
+		fpr22_off, fpr23_off,
+		fpr24_off, fpr25_off,
+		fpr26_off, fpr27_off,
+		fpr28_off, fpr29_off,
+		fpr30_off, fpr31_off,
+
+		v0_off, v1_off,
+		a0_off, a1_off,
+		a2_off, a3_off,
+		t0_off, t1_off, t2_off, t3_off, t4_off, t5_off, t6_off, t7_off,
+		s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off,
+		t8_off, t9_off,
+
+		gp_off, fp_off,
+
+		return_off,
+		reg_save_size
+	};
+
+  public:
+
+  //static int Oexception_offset() { return o0_offset; };
+  //static int G3_offset() { return g3_offset; };
+  //static int G5_offset() { return g5_offset; };
+  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
+  static void restore_live_registers(MacroAssembler* masm);
+	//FIXME, I have no idea which register to use
+	//static int eax_offset() { return eax_off; }
+	// static int ebx_offset() { return ebx_off; }
+	static int v0_Offset(void) { return v0_off; }
+	static int t7_Offset(void) { return t7_off; }
+
+	static int v0Offset(void) { return v0_off; }
+	static int v1Offset(void) { return v1_off; }
+
+	static int fpResultOffset(void) { return fpr0_off; }
+
+	// During deoptimization only the result register need to be restored
+  // all the other values have already been extracted.
+
+  static void restore_result_registers(MacroAssembler* masm);
+};
+
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+
+	int frame_words = reg_save_size + additional_frame_words;
+	int frame_size_in_bytes =  frame_words * wordSize;
+	*total_frame_words = frame_words;
+
+	// save registers, fpu state, and flags
+	// We assume caller has already has return address slot on the stack
+	// We push epb twice in this sequence because we want the real ebp
+	// to be under the return like a normal enter and we want to use pushad
+	// We push by hand instead of pusing push
+
+	__ addiu(SP, SP, - reg_save_size * wordSize);
+
+	__ swc1(F0, SP, fpr0_off * wordSize); __ swc1(F1, SP, fpr1_off * wordSize);
+	__ swc1(F2, SP, fpr2_off * wordSize); __ swc1(F3, SP, fpr3_off * wordSize);
+	__ swc1(F4, SP, fpr4_off * wordSize); __ swc1(F5, SP, fpr5_off * wordSize);
+	__ swc1(F6, SP, fpr6_off * wordSize);	__ swc1(F7, SP, fpr7_off * wordSize);
+	__ swc1(F8, SP, fpr8_off * wordSize);	__ swc1(F9, SP, fpr9_off * wordSize);
+	__ swc1(F10, SP, fpr10_off * wordSize);	__ swc1(F11, SP, fpr11_off * wordSize);
+	__ swc1(F12, SP, fpr12_off * wordSize);	__ swc1(F13, SP, fpr13_off * wordSize);
+	__ swc1(F14, SP, fpr14_off * wordSize);	__ swc1(F15, SP, fpr15_off * wordSize);
+	__ swc1(F16, SP, fpr16_off * wordSize);	__ swc1(F17, SP, fpr17_off * wordSize);
+	__ swc1(F18, SP, fpr18_off * wordSize);	__ swc1(F19, SP, fpr19_off * wordSize);
+	__ swc1(F20, SP, fpr20_off * wordSize);	__ swc1(F21, SP, fpr21_off * wordSize);
+	__ swc1(F22, SP, fpr22_off * wordSize);	__ swc1(F23, SP, fpr23_off * wordSize);
+	__ swc1(F24, SP, fpr24_off * wordSize);	__ swc1(F25, SP, fpr25_off * wordSize);
+	__ swc1(F26, SP, fpr26_off * wordSize);	__ swc1(F27, SP, fpr27_off * wordSize);
+	__ swc1(F28, SP, fpr28_off * wordSize);	__ swc1(F29, SP, fpr29_off * wordSize);
+	__ swc1(F30, SP, fpr30_off * wordSize);	__ swc1(F31, SP, fpr31_off * wordSize);
+	__ sw(V0, SP, v0_off * wordSize);	__ sw(V1, SP, v1_off * wordSize);
+	__ sw(A0, SP, a0_off * wordSize);	__ sw(A1, SP, a1_off * wordSize);
+	__ sw(A2, SP, a2_off * wordSize);	__ sw(A3, SP, a3_off * wordSize);
+	__ sw(T0, SP, t0_off * wordSize);
+	__ sw(T1, SP, t1_off * wordSize);
+	__ sw(T2, SP, t2_off * wordSize);
+	__ sw(T3, SP, t3_off * wordSize);
+	__ sw(T4, SP, t4_off * wordSize);
+	__ sw(T5, SP, t5_off * wordSize);
+	__ sw(T6, SP, t6_off * wordSize);
+	__ sw(T7, SP, t7_off * wordSize);
+	__ sw(S0, SP, s0_off * wordSize);
+	__ sw(S1, SP, s1_off * wordSize);
+	__ sw(S2, SP, s2_off * wordSize);
+	__ sw(S3, SP, s3_off * wordSize);
+	__ sw(S4, SP, s4_off * wordSize);
+	__ sw(S5, SP, s5_off * wordSize);
+	__ sw(S6, SP, s6_off * wordSize);
+	__ sw(S7, SP, s7_off * wordSize);
+
+	__ sw(T8, SP, t8_off * wordSize);
+	__ sw(T9, SP, t9_off * wordSize);
+
+	__ sw(GP, SP, gp_off * wordSize);
+	__ sw(FP, SP, fp_off * wordSize);
+	__ sw(RA, SP, return_off * wordSize);
+	__ addi(FP, SP, fp_off * wordSize);
+
+	OopMapSet *oop_maps = new OopMapSet();
+	OopMap* map =  new OopMap( frame_words, 0 );
+
+
+#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
+	map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t4_off), T4->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t5_off), T5->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t6_off), T6->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t7_off), T7->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
+
+	map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
+	map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
+#undef STACK_OFFSET
+	return map;
+}
+
+
+// Pop the current frame and restore all the registers that we
+// saved.
+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
+	__ lwc1(F0, SP, fpr0_off * wordSize); __ lwc1(F1, SP, fpr1_off * wordSize);
+	__ lwc1(F2, SP, fpr2_off * wordSize); __ lwc1(F3, SP, fpr3_off * wordSize);
+	__ lwc1(F4, SP, fpr4_off * wordSize); __ lwc1(F5, SP, fpr5_off * wordSize);
+	__ lwc1(F6, SP, fpr6_off * wordSize);	__ lwc1(F7, SP, fpr7_off * wordSize);
+	__ lwc1(F8, SP, fpr8_off * wordSize);	__ lwc1(F9, SP, fpr9_off * wordSize);
+	__ lwc1(F10, SP, fpr10_off * wordSize);	__ lwc1(F11, SP, fpr11_off * wordSize);
+	__ lwc1(F12, SP, fpr12_off * wordSize);	__ lwc1(F13, SP, fpr13_off * wordSize);
+	__ lwc1(F14, SP, fpr14_off * wordSize);	__ lwc1(F15, SP, fpr15_off * wordSize);
+	__ lwc1(F16, SP, fpr16_off * wordSize);	__ lwc1(F17, SP, fpr17_off * wordSize);
+	__ lwc1(F18, SP, fpr18_off * wordSize);	__ lwc1(F19, SP, fpr19_off * wordSize);
+	__ lwc1(F20, SP, fpr20_off * wordSize);	__ lwc1(F21, SP, fpr21_off * wordSize);
+	__ lwc1(F22, SP, fpr22_off * wordSize);	__ lwc1(F23, SP, fpr23_off * wordSize);
+	__ lwc1(F24, SP, fpr24_off * wordSize);	__ lwc1(F25, SP, fpr25_off * wordSize);
+	__ lwc1(F26, SP, fpr26_off * wordSize);	__ lwc1(F27, SP, fpr27_off * wordSize);
+	__ lwc1(F28, SP, fpr28_off * wordSize);	__ lwc1(F29, SP, fpr29_off * wordSize);
+	__ lwc1(F30, SP, fpr30_off * wordSize);	__ lwc1(F31, SP, fpr31_off * wordSize);
+
+	__ lw(V0, SP, v0_off * wordSize);	__ lw(V1, SP, v1_off * wordSize);
+	__ lw(A0, SP, a0_off * wordSize);	__ lw(A1, SP, a1_off * wordSize);
+	__ lw(A2, SP, a2_off * wordSize);	__ lw(A3, SP, a3_off * wordSize);
+	__ lw(T0, SP, t0_off * wordSize);
+	__ lw(T1, SP, t1_off * wordSize);
+	__ lw(T2, SP, t2_off * wordSize);
+	__ lw(T3, SP, t3_off * wordSize);
+	__ lw(T4, SP, t4_off * wordSize);
+	__ lw(T5, SP, t5_off * wordSize);
+	__ lw(T6, SP, t6_off * wordSize);
+	__ lw(T7, SP, t7_off * wordSize);
+	__ lw(S0, SP, s0_off * wordSize);
+	__ lw(S1, SP, s1_off * wordSize);
+	__ lw(S2, SP, s2_off * wordSize);
+	__ lw(S3, SP, s3_off * wordSize);
+	__ lw(S4, SP, s4_off * wordSize);
+	__ lw(S5, SP, s5_off * wordSize);
+	__ lw(S6, SP, s6_off * wordSize);
+	__ lw(S7, SP, s7_off * wordSize);
+
+	__ lw(T8, SP, t8_off * wordSize);
+	__ lw(T9, SP, t9_off * wordSize);
+
+	__ lw(GP, SP, gp_off * wordSize);
+	__ lw(FP, SP, fp_off * wordSize);
+	__ lw(RA, SP, return_off * wordSize);
+
+	__ addiu(SP, SP, reg_save_size * wordSize);
+}
+
+// Pop the current frame and restore the registers that might be holding
+// a result.
+// FIXME, if the result is float?
+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
+	// Just restore result register. Only used by deoptimization. By
+	// now any callee save register that needs to be restore to a c2
+	// caller of the deoptee has been extracted into the vframeArray
+	// and will be stuffed into the c2i adapter we create for later
+	// restoration so only result registers need to be restored here.
+	//
+	__ lw(V0, SP, v0_off * wordSize);
+	__ lw(V1, SP, v1_off * wordSize);
+	__ addiu(SP, SP, return_off * wordSize);
+}
+
+// The java_calling_convention describes stack locations as ideal slots on
+// a frame with no abi restrictions. Since we must observe abi restrictions
+// (like the placement of the register window) the slots must be biased by
+// the following value.
+/*
+static int reg2offset(VMReg r) {
+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+}
+*/
+static int reg2offset_in(VMReg r) {
+	// Account for saved ebp and return address
+	// This should really be in_preserve_stack_slots
+	return (r->reg2stack() + 2) * VMRegImpl::stack_slot_size;
+}
+
+static int reg2offset_out(VMReg r) {
+	return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+}
+
+// ---------------------------------------------------------------------------
+// Read the array of BasicTypes from a signature, and compute where the
+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
+// quantities.  Values less than VMRegImpl::stack0 are registers, those above
+// refer to 4-byte stack slots.  All stack slots are based off of the window
+// top.  VMRegImpl::stack0 refers to the first slot past the 16-word window,
+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
+// values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit
+// integer registers.  Values 64-95 are the (32-bit only) float registers.
+// Each 32-bit quantity is given its own number, so the integer registers
+// (in either 32- or 64-bit builds) use 2 numbers.  For example, there is
+// an O0-low and an O0-high.  Essentially, all int register numbers are doubled.
+
+// Register results are passed in O0-O5, for outgoing call arguments.  To
+// convert to incoming arguments, convert all O's to I's.  The regs array
+// refer to the low and hi 32-bit words of 64-bit registers or stack slots.
+// If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a
+// 32-bit value was passed).  If both are VMRegImpl::Bad(), it means no value was
+// passed (used as a placeholder for the other half of longs and doubles in
+// the 64-bit build).  regs[].second() is either VMRegImpl::Bad() or regs[].second() is
+// regs[].first()+1 (regs[].first() may be misaligned in the C calling convention).
+// Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first()
+// == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the
+// same VMRegPair.
+
+// Note: the INPUTS in sig_bt are in units of Java argument words, which are
+// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
+// units regardless of build.
+
+
+// ---------------------------------------------------------------------------
+// The compiled Java calling convention.  The Java convention always passes
+// 64-bit values in adjacent aligned locations (either registers or stack),
+// floats in float registers and doubles in aligned float pairs.  Values are
+// packed in the registers.  There is no backing varargs store for values in
+// registers.  In the 32-bit build, longs are passed in G1 and G4 (cannot be
+// passed in I's, because longs in I's get their heads chopped off at
+// interrupt).
+/*
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+                                           VMRegPair *regs,
+                                           int total_args_passed,
+                                           int is_outgoing) {
+  assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
+
+  // Convention is to pack the first 6 int/oop args into the first 6 registers
+  // (I0-I5), extras spill to the stack.  Then pack the first 8 float args
+  // into F0-F7, extras spill to the stack.  Then pad all register sets to
+  // align.  Then put longs and doubles into the same registers as they fit,
+  // else spill to the stack.
+  const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
+  const int flt_reg_max = 8;
+  //
+  // Where 32-bit 1-reg longs start being passed
+  // In tiered we must pass on stack because c1 can't use a "pair" in a single reg.
+  // So make it look like we've filled all the G regs that c2 wants to use.
+  Register g_reg = TieredCompilation ? noreg : G1;
+
+  // Count int/oop and float args.  See how many stack slots we'll need and
+  // where the longs & doubles will go.
+  int int_reg_cnt   = 0;
+  int flt_reg_cnt   = 0;
+  // int stk_reg_pairs = frame::register_save_words*(wordSize>>2);
+  // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots();
+  int stk_reg_pairs = 0;
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_LONG:                // LP64, longs compete with int args
+      assert(sig_bt[i+1] == T_VOID, "");
+#ifdef _LP64
+      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+#endif
+      break;
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
+      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+#ifndef _LP64
+      else                            stk_reg_pairs++;
+#endif
+      break;
+    case T_INT:
+    case T_SHORT:
+    case T_CHAR:
+    case T_BYTE:
+    case T_BOOLEAN:
+      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+      else                            stk_reg_pairs++;
+      break;
+    case T_FLOAT:
+      if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
+      else                            stk_reg_pairs++;
+      break;
+    case T_DOUBLE:
+      assert(sig_bt[i+1] == T_VOID, "");
+      break;
+    case T_VOID:
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+  }
+
+  // This is where the longs/doubles start on the stack.
+  stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
+
+  int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
+  int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
+
+  // int stk_reg = frame::register_save_words*(wordSize>>2);
+  // int stk_reg = SharedRuntime::out_preserve_stack_slots();
+  int stk_reg = 0;
+  int int_reg = 0;
+  int flt_reg = 0;
+
+  // Now do the signature layout
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_INT:
+    case T_SHORT:
+    case T_CHAR:
+    case T_BYTE:
+    case T_BOOLEAN:
+#ifndef _LP64
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
+#endif // _LP64
+      if (int_reg < int_reg_max) {
+        Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
+        regs[i].set1(r->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
+      }
+      break;
+
+#ifdef _LP64
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
+      if (int_reg < int_reg_max) {
+        Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
+        regs[i].set2(r->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
+        stk_reg_pairs += 2;
+      }
+      break;
+#endif // _LP64
+
+    case T_LONG:
+      assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");
+#ifdef _LP64
+        if (int_reg < int_reg_max) {
+          Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
+          regs[i].set2(r->as_VMReg());
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
+          stk_reg_pairs += 2;
+        }
+#else
+#ifdef COMPILER2
+        // For 32-bit build, can't pass longs in O-regs because they become
+        // I-regs and get trashed.  Use G-regs instead.  G1 and G4 are almost
+        // spare and available.  This convention isn't used by the Sparc ABI or
+        // anywhere else. If we're tiered then we don't use G-regs because c1
+        // can't deal with them as a "pair". (Tiered makes this code think g's are filled)
+        // G0: zero
+        // G1: 1st Long arg
+        // G2: global allocated to TLS
+        // G3: used in inline cache check
+        // G4: 2nd Long arg
+        // G5: used in inline cache check
+        // G6: used by OS
+        // G7: used by OS
+
+        if (g_reg == G1) {
+          regs[i].set2(G1->as_VMReg()); // This long arg in G1
+          g_reg = G4;                  // Where the next arg goes
+        } else if (g_reg == G4) {
+          regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4
+          g_reg = noreg;               // No more longs in registers
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
+          stk_reg_pairs += 2;
+        }
+#else // COMPILER2
+        if (int_reg_pairs + 1 < int_reg_max) {
+          if (is_outgoing) {
+            regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
+          } else {
+            regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
+          }
+          int_reg_pairs += 2;
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
+          stk_reg_pairs += 2;
+        }
+#endif // COMPILER2
+#endif // _LP64
+      break;
+
+    case T_FLOAT:
+      if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
+      else                       regs[i].set1(    VMRegImpl::stack2reg(stk_reg++));
+      break;
+    case T_DOUBLE:
+      assert(sig_bt[i+1] == T_VOID, "expecting half");
+      if (flt_reg_pairs + 1 < flt_reg_max) {
+        regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg());
+        flt_reg_pairs += 2;
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
+        stk_reg_pairs += 2;
+      }
+      break;
+    case T_VOID: regs[i].set_bad();  break; // Halves of longs & doubles
+    default:
+      ShouldNotReachHere();
+    }
+  }
+
+  // retun the amount of stack space these arguments will need.
+  return stk_reg_pairs;
+
+}
+*/
+
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+                                           VMRegPair *regs,
+                                           int total_args_passed,
+                                           int is_outgoing) {
+	uint    stack = 0;          // Starting stack position for args on stack
+
+
+	// Pass first oop/int args in registers T0 .
+	uint reg_arg0 = 9999;
+	uint reg_arg1 = 9999;
+	uint reg_arg2 = 9999;
+	uint reg_arg3 = 9999;
+	uint reg_arg4 = 9999;
+//	uint reg_arg1 = 9999;
+
+
+// Pass doubles & longs &float  ligned on the stack.  First count stack slots for doubles
+	int i;
+	for( i = 0; i < total_args_passed; i++) {
+		if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) {
+			stack += 2;
+		}
+	}
+	int dstack = 0;             // Separate counter for placing doubles
+ for( i = 0; i < total_args_passed; i++) {
+    // From the type and the argument number (count) compute the location
+    switch( sig_bt[i] ) {
+    case T_SHORT:
+    case T_CHAR:
+    case T_BYTE:
+    case T_BOOLEAN:
+    case T_INT:
+    case T_ARRAY:
+    case T_OBJECT:
+    case T_ADDRESS:
+	    if( reg_arg0 == 9999 )  {
+		    reg_arg0 = i;
+		    regs[i].set1(T0->as_VMReg());
+	    }
+	    else if( reg_arg1 == 9999 )  {
+		    reg_arg1 = i;
+		    regs[i].set1(A0->as_VMReg());
+	    }
+	   else if( reg_arg2 == 9999 )  {
+		    reg_arg2 = i;
+		    regs[i].set1(A1->as_VMReg());
+	    }else if( reg_arg3 == 9999 )  {
+		    reg_arg3 = i;
+		    regs[i].set1(A2->as_VMReg());
+	    }else if( reg_arg4 == 9999 )  {
+		    reg_arg4 = i;
+		    regs[i].set1(A3->as_VMReg());
+	    }
+	    else
+	    {
+		    regs[i].set1(VMRegImpl::stack2reg(stack++));
+	    }
+	    break;
+    case T_FLOAT:
+	    regs[i].set1(VMRegImpl::stack2reg(stack++));
+	   // regs[i].set2(VMRegImpl::stack2reg(dstack));
+	   // dstack += 2;
+	    break;
+    case T_LONG:
+	    assert(sig_bt[i+1] == T_VOID, "missing Half" );
+	    regs[i].set2(VMRegImpl::stack2reg(dstack));
+	    dstack += 2;
+	    break;
+    case T_DOUBLE:
+	    assert(sig_bt[i+1] == T_VOID, "missing Half" );
+	    regs[i].set2(VMRegImpl::stack2reg(dstack));
+	    dstack += 2;
+	    break;
+    case T_VOID: regs[i].set_bad(); break;
+		 break;
+    default:
+		 ShouldNotReachHere();
+		 break;
+    }
+ }
+
+  // return value can be odd number of VMRegImpl stack slots make multiple of 2
+  return round_to(stack, 2);
+}
+
+// Helper class mostly to avoid passing masm everywhere, and handle store
+// displacement overflow logic for LP64
+class AdapterGenerator {
+  MacroAssembler *masm;
+#ifdef _LP64
+  Register Rdisp;
+  void set_Rdisp(Register r)  { Rdisp = r; }
+#endif // _LP64
+
+  void patch_callers_callsite();
+  void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch);
+
+  // base+st_off points to top of argument
+  int arg_offset(const int st_off) { return st_off + Interpreter::value_offset_in_bytes(); }
+  int next_arg_offset(const int st_off) {
+    return st_off - Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes();
+  }
+
+#ifdef _LP64
+  // On _LP64 argument slot values are loaded first into a register
+  // because they might not fit into displacement.
+  Register arg_slot(const int st_off);
+  Register next_arg_slot(const int st_off);
+#else
+  int arg_slot(const int st_off)      { return arg_offset(st_off); }
+  int next_arg_slot(const int st_off) { return next_arg_offset(st_off); }
+#endif // _LP64
+
+  // Stores long into offset pointed to by base
+  void store_c2i_long(Register r, Register base,
+                      const int st_off, bool is_stack);
+  void store_c2i_object(Register r, Register base,
+                        const int st_off);
+  void store_c2i_int(Register r, Register base,
+                     const int st_off);
+  void store_c2i_double(VMReg r_2,
+                        VMReg r_1, Register base, const int st_off);
+  void store_c2i_float(FloatRegister f, Register base,
+                       const int st_off);
+
+ public:
+	void tag_stack(const BasicType sig, int st_off);
+  void gen_c2i_adapter(int total_args_passed,
+                              // VMReg max_arg,
+                              int comp_args_on_stack, // VMRegStackSlots
+                              const BasicType *sig_bt,
+                              const VMRegPair *regs,
+                              Label& skip_fixup);
+  void gen_i2c_adapter(int total_args_passed,
+                              // VMReg max_arg,
+                              int comp_args_on_stack, // VMRegStackSlots
+                              const BasicType *sig_bt,
+                              const VMRegPair *regs);
+
+  AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
+};
+
+
+// Patch the callers callsite with entry to compiled code if it exists.
+void AdapterGenerator::patch_callers_callsite() {
+	Label L;
+	//FIXME , what is stored in eax?
+	//__ verify_oop(ebx);
+	__ verify_oop(T7);
+	// __ cmpl(Address(ebx, in_bytes(methodOopDesc::code_offset())), NULL_WORD);
+	__ lw(AT, T7, in_bytes(methodOopDesc::code_offset()));
+	//__ jcc(Assembler::equal, L);
+	__ beq(AT,ZERO,L);
+	__ delayed()->nop();
+	// Schedule the branch target address early.
+	// Call into the VM to patch the caller, then jump to compiled callee
+	// eax isn't live so capture return address while we easily can
+	//  __ movl(eax, Address(esp, 0));
+//	__ lw(T5,SP,0);
+	__ move(V0, RA);
+
+	__ pushad();
+      	//jerome_for_debug
+	// __ pushad();
+	// __ pushfd();
+#ifdef COMPILER2
+	// C2 may leave the stack dirty if not in SSE2+ mode
+	__ empty_FPU_stack();
+#endif /* COMPILER2 */
+
+	// VM needs caller's callsite
+	//  __ pushl(eax);
+
+	// VM needs target method
+	// __ pushl(ebx);
+	//  __ push(T7);
+	// __ verify_oop(ebx);
+
+	__ move(A0, T7);
+	__ move(A1, V0);
+	__ addi(SP, SP, -8);
+//we should preserve the return address
+	__ verify_oop(T7);
+      	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
+			relocInfo::runtime_call_type);
+	//__ addl(esp, 2*wordSize);
+
+	__ delayed()->nop();
+        __ addi(SP, SP, 8);
+	//  __ popfd();
+	__ popad();
+	__ bind(L);
+}
+
+void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off,
+                 Register scratch) {
+	Unimplemented();
+}
+
+#ifdef _LP64
+Register AdapterGenerator::arg_slot(const int st_off) {
+	Unimplemented();
+}
+
+Register AdapterGenerator::next_arg_slot(const int st_off){
+	Unimplemented();
+}
+#endif // _LP64
+
+// Stores long into offset pointed to by base
+void AdapterGenerator::store_c2i_long(Register r, Register base,
+                                      const int st_off, bool is_stack) {
+	Unimplemented();
+}
+
+void AdapterGenerator::store_c2i_object(Register r, Register base,
+                      const int st_off) {
+	Unimplemented();
+}
+
+void AdapterGenerator::store_c2i_int(Register r, Register base,
+                   const int st_off) {
+	Unimplemented();
+}
+
+// Stores into offset pointed to by base
+void AdapterGenerator::store_c2i_double(VMReg r_2,
+                      VMReg r_1, Register base, const int st_off) {
+	Unimplemented();
+}
+
+void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
+                                       const int st_off) {
+	Unimplemented();
+}
+
+void  AdapterGenerator::tag_stack(const BasicType sig, int st_off) {
+	if (TaggedStackInterpreter) {
+		int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
+		if (sig == T_OBJECT || sig == T_ARRAY) {
+			//   __ movl(Address(esp, tag_offset), frame::TagReference);
+			//  __ addi(AT,ZERO, frame::TagReference);
+
+			__ move(AT, frame::TagReference);
+			__ sw (AT, SP, tag_offset);
+		} else if (sig == T_LONG || sig == T_DOUBLE) {
+			int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
+			// __ movl(Address(esp, next_tag_offset), frame::TagValue);
+			// __ addi(AT,ZERO, frame::TagValue);
+			__ move(AT, frame::TagValue);
+			__ sw (AT, SP, next_tag_offset);
+			//__ movl(Address(esp, tag_offset), frame::TagValue);
+			//   __ addi(AT,ZERO, frame::TagValue);
+			__ move(AT, frame::TagValue);
+			__ sw (AT, SP, tag_offset);
+
+		} else {
+			//  __ movl(Address(esp, tag_offset), frame::TagValue);
+			//__ addi(AT,ZERO, frame::TagValue);
+			__ move(AT, frame::TagValue);
+			__ sw (AT, SP, tag_offset);
+
+		}
+	}
+}
+
+void AdapterGenerator::gen_c2i_adapter(
+                            int total_args_passed,
+                            // VMReg max_arg,
+                            int comp_args_on_stack, // VMRegStackSlots
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs,
+                            Label& skip_fixup) {
+
+  // Before we get into the guts of the C2I adapter, see if we should be here
+  // at all.  We've come from compiled code and are attempting to jump to the
+  // interpreter, which means the caller made a static call to get here
+  // (vcalls always get a compiled target if there is one).  Check for a
+  // compiled target.  If there is one, we need to patch the caller's call.
+  // However we will run interpreted if we come thru here. The next pass
+  // thru the call site will run compiled. If we ran compiled here then
+  // we can (theorectically) do endless i2c->c2i->i2c transitions during
+  // deopt/uncommon trap cycles. If we always go interpreted here then
+  // we can have at most one and don't need to play any tricks to keep
+  // from endlessly growing the stack.
+  //
+  // Actually if we detected that we had an i2c->c2i transition here we
+  // ought to be able to reset the world back to the state of the interpreted
+  // call and not bother building another interpreter arg area. We don't
+  // do that at this point.
+
+	patch_callers_callsite();
+
+	__ bind(skip_fixup);
+
+#ifdef COMPILER2
+	__ empty_FPU_stack();
+#endif /* COMPILER2 */
+	//this is for native ?
+	// Since all args are passed on the stack, total_args_passed * interpreter_
+	// stack_element_size  is the
+	// space we need.
+	int extraspace = total_args_passed * Interpreter::stackElementSize();
+
+	// Get return address
+	// __ popl(eax);
+	//__ pop(T4);
+        __ move(V0, RA);
+	// set senderSP value
+	// __ movl(esi, esp);
+//refer to interpreter_mips.cpp:generate_asm_entry
+	__ move(T5, SP);
+	//__ subl(esp, extraspace);
+	__ addi(SP,SP, -extraspace);
+
+	// Now write the args into the outgoing interpreter space
+	for (int i = 0; i < total_args_passed; i++) {
+		if (sig_bt[i] == T_VOID) {
+			assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE),
+					"missing half");
+			continue;
+		}
+
+		// st_off points to lowest address on stack.
+		int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize();
+		// Say 4 args:
+		// i   st_off
+		// 0   12 T_LONG
+		// 1    8 T_VOID
+		// 2    4 T_OBJECT
+		// 3    0 T_BOOL
+		VMReg r_1 = regs[i].first();
+		VMReg r_2 = regs[i].second();
+		if (!r_1->is_valid()) {
+			assert(!r_2->is_valid(), "");
+			continue;
+		}
+
+		if (r_1->is_stack()) {
+			// memory to memory use fpu stack top
+			int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+
+			if (!r_2->is_valid()) {
+				//  __ movl(edi, Address(esp, ld_off));
+				__ lw(AT, SP, ld_off);
+				//__ movl(Address(esp, st_off), edi);
+				__ sw(AT, SP, st_off);
+				tag_stack(sig_bt[i], st_off);
+			} else {
+
+				// ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
+				// st_off == MSW, st_off-wordSize == LSW
+
+				int next_off = st_off - Interpreter::stackElementSize();
+				//__ movl(edi, Address(esp, ld_off));
+				__ lw(AT, SP, ld_off);
+				//__ movl(Address(esp, next_off), edi);
+				__ sw(AT, SP, next_off);
+				// __ movl(edi, Address(esp, ld_off + wordSize));
+				__ lw(AT, SP, ld_off + wordSize);
+				//__ movl(Address(esp, st_off), edi);
+				__ sw(AT, SP, st_off);
+				tag_stack(sig_bt[i], next_off);
+			}
+		} else if (r_1->is_Register()) {
+			Register r = r_1->as_Register();
+			if (!r_2->is_valid()) {
+				// __ movl(Address(esp, st_off), r);
+				__ sw(r,SP, st_off);
+				tag_stack(sig_bt[i], st_off);
+			} else {
+				//FIXME, x86 will not enter here
+				// long/double in gpr
+				ShouldNotReachHere();
+			//	int next_off = st_off - Interpreter::stackElementSize();
+			//	__ sw(r_2->as_Register(),SP, st_off);
+			//	__ sw(r,SP, next_off);
+			//	tag_stack(masm, sig_bt[i], next_off);
+			}
+		}
+	}
+
+	// Schedule the branch target address early.
+	//  __ movl(ecx, Address(ebx, in_bytes(methodOopDesc::interpreter_entry_offset())));
+	__ lw(AT,T7,in_bytes(methodOopDesc::interpreter_entry_offset()) );
+	// And repush original return address
+	//__ push(T4);
+	__ move(RA, V0);
+	//__ jmp(T5);
+//	__ move(T4, (int)&jerome7);
+//		__ sw(AT, T4, 0)
+	__ jr (AT);
+	__ delayed()->nop();
+}
+
+void AdapterGenerator::gen_i2c_adapter(
+                            int total_args_passed,
+                            // VMReg max_arg,
+                            int comp_args_on_stack, // VMRegStackSlots
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs) {
+
+  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
+  // layout.  Lesp was saved by the calling I-frame and will be restored on
+  // return.  Meanwhile, outgoing arg space is all owned by the callee
+  // C-frame, so we can mangle it at will.  After adjusting the frame size,
+  // hoist register arguments and repack other args according to the compiled
+  // code convention.  Finally, end in a jump to the compiled code.  The entry
+  // point address is the start of the buffer.
+
+  // We will only enter here from an interpreted frame and never from after
+  // passing thru a c2i. Azul allowed this but we do not. If we lose the
+  // race and use a c2i we will remain interpreted for the race loser(s).
+  // This removes all sorts of headaches on the x86 side and also eliminates
+  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
+
+  // As you can see from the list of inputs & outputs there are not a lot
+  // of temp registers to work with: mostly G1, G3 & G4.
+
+  // Inputs:
+  // G2_thread      - TLS
+  // G5_method      - Method oop
+  // O0             - Flag telling us to restore SP from O5
+  // O4_args        - Pointer to interpreter's args
+  // O5             - Caller's saved SP, to be restored if needed
+  // O6             - Current SP!
+  // O7             - Valid return address
+  // L0-L7, I0-I7    - Caller's temps (no frame pushed yet)
+
+  // Outputs:
+  // G2_thread      - TLS
+  // G1, G4         - Outgoing long args in 32-bit build
+  // O0-O5          - Outgoing args in compiled layout
+  // O6             - Adjusted or restored SP
+  // O7             - Valid return address
+  // L0-L7, I0-I7    - Caller's temps (no frame pushed yet)
+  // F0-F7          - more outgoing args
+
+
+	__ move(S5, SP);
+
+	// Cut-out for having no stack args.  Since up to 2 int/oop args are passed
+	// in registers, we will occasionally have no stack args.
+	int comp_words_on_stack = 0;
+	if (comp_args_on_stack) {
+		// Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
+		// registers are below.  By subtracting stack0, we either get a negative
+		// number (all values in registers) or the maximum stack slot accessed.
+		// int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
+		// Convert 4-byte stack slots to words.
+		// did mips need round? FIXME
+		comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
+		// Round up to miminum stack alignment, in wordSize
+		comp_words_on_stack = round_to(comp_words_on_stack, 2);
+		// __ subl(esp, comp_words_on_stack * wordSize);
+		__ addi(SP, SP, -comp_words_on_stack * wordSize);
+	}
+
+	// Align the outgoing SP
+	//__ andl(esp, -(StackAlignmentInBytes));
+	__ move(AT, -(StackAlignmentInBytes));
+ 	__ andr(SP, SP, AT);
+	// push the return address on the stack (note that pushing, rather
+	// than storing it, yields the correct frame alignment for the callee)
+	// __ pushl(eax);
+//	__ push(T5);
+//	__ move(RA, T5);
+	// Put saved SP in another register
+	// const Register saved_sp = eax;
+	const Register saved_sp = V0;
+	//__ movl(saved_sp, edi);
+	__ move(saved_sp, S5);
+
+
+	// Will jump to the compiled code just as if compiled code was doing it.
+	// Pre-load the register-jump target early, to schedule it better.
+	// __ movl(edi, Address(ebx, in_bytes(methodOopDesc::from_compiled_offset())));
+	__ lw(S5, T7, in_bytes(methodOopDesc::from_compiled_offset()));
+	// Now generate the shuffle code.  Pick up all register args and move the
+	// rest through the floating point stack top.
+	//printf("args num =%d \n", total_args_passed);
+	for (int i = 0; i < total_args_passed; i++) {
+		if (sig_bt[i] == T_VOID) {
+			// Longs and doubles are passed in native word order, but misaligned
+			// in the 32-bit build.
+			assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+			continue;
+		}
+
+		// Pick up 0, 1 or 2 words from SP+offset.
+
+		assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
+				"scrambled load targets?");
+		// Load in argument order going down.
+		//NOTICE HERE!!!! I add 1 here
+		int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize()
+			+ Interpreter::value_offset_in_bytes();
+		// Point to interpreter value (vs. tag)
+		int next_off = ld_off - Interpreter::stackElementSize();
+		//
+		//
+		//
+		VMReg r_1 = regs[i].first();
+		VMReg r_2 = regs[i].second();
+		if (!r_1->is_valid()) {
+			assert(!r_2->is_valid(), "");
+			continue;
+		}
+		if (r_1->is_stack()) {
+			// Convert stack slot to an SP offset (+ wordSize to
+			// account for return address )
+		//NOTICE HERE!!!! I sub a wordSize here
+			int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
+				//+ wordSize;
+
+			// We can use esi as a temp here because compiled code doesn't
+			// need esi as an input
+			// and if we end up going thru a c2i because of a miss a reasonable
+			// value of esi
+			// we be generated.
+			if (!r_2->is_valid()) {
+				// __ fld_s(Address(saved_sp, ld_off));
+				// __ fstp_s(Address(esp, st_off));
+				// __ movl(esi, Address(saved_sp, ld_off));
+				__ lw(AT, saved_sp, ld_off);
+				// __ movl(Address(esp, st_off), esi);
+				__ sw(AT, SP, st_off);
+			} else {
+				// Interpreter local[n] == MSW, local[n+1] == LSW however locals
+				// are accessed as negative so LSW is at LOW address
+
+				// ld_off is MSW so get LSW
+				// st_off is LSW (i.e. reg.first())
+				// __ fld_d(Address(saved_sp, next_off));
+				// __ fstp_d(Address(esp, st_off));
+				//  __ movl(esi, Address(saved_sp, next_off));
+				__ lw(AT, saved_sp, next_off);
+				//__ movl(Address(esp, st_off), esi);
+				__ sw(AT, SP, st_off);
+				//__ movl(esi, Address(saved_sp, ld_off));
+				__ lw(AT, saved_sp, ld_off);
+				//__ movl(Address(esp, st_off + wordSize), esi);
+				__ sw(AT, SP, st_off + wordSize);
+			}
+		} else if (r_1->is_Register()) {  // Register argument
+			Register r = r_1->as_Register();
+			// assert(r != eax, "must be different");
+			if (r_2->is_valid()) {
+				//  assert(r_2->as_Register() != eax, "need another temporary register");
+				// Remember r_1 is low address (and LSB on x86)
+				// So r_2 gets loaded from high address regardless of the platform
+				//  __ movl(r_2->as_Register(), Address(saved_sp, ld_off));
+				__ lw(r_2->as_Register(), saved_sp, ld_off);
+				//  __ movl(r, Address(saved_sp, next_off));
+				//    __ movl(r, Address(saved_sp, next_off));
+				__ lw(r, saved_sp, next_off);
+			} else {
+				//  __ movl(r, Address(saved_sp, ld_off));
+				__ lw(r, saved_sp, ld_off);
+			}
+		}
+	}
+
+  // 6243940 We might end up in handle_wrong_method if
+  // the callee is deoptimized as we race thru here. If that
+  // happens we don't want to take a safepoint because the
+  // caller frame will look interpreted and arguments are now
+  // "compiled" so it is much better to make this transition
+  // invisible to the stack walking code. Unfortunately if
+  // we try and find the callee by normal means a safepoint
+  // is possible. So we stash the desired callee in the thread
+  // and the vm will find there should this case occur.
+//jerome_for_debug
+ //      	__ move(AT, total_args_passed);
+	//__ get_thread(eax);
+		__ get_thread(T8);
+		// __ movl(Address(eax, JavaThread::callee_target_offset()), ebx);
+		__ sw(T7,T8, in_bytes(JavaThread::callee_target_offset()));
+
+		// move methodOop to eax in case we end up in an c2i adapter.
+		// the c2i adapters expect methodOop in eax (c2) because c2's
+		// resolve stubs return the result (the method) in eax.
+		// I'd love to fix this.
+		//__ movl(eax, ebx);
+		//__ move(TREG, T7);
+		__ move(V0, T7);
+		//__ jmp(edi);
+//		__ move(RA, (int)__ pc());
+//		__ addi(RA, RA,8);
+//	__ lw(AT, ZERO, 16);;
+ //       __ nop();
+//	__ nop();
+//jerome_for_debug
+/*
+		__ move(AT, (int)&jerome10);
+		__ sw(saved_sp, AT, 0);
+       		__ move(AT, (int)&jerome9);
+		__ sw(T4, AT, 0);
+       		__ move(AT, (int)&jerome8);
+		__ sw(SP, AT, 0);
+
+
+		__ move(AT, (int)&jerome5);
+		__ sw(T0, AT, 0);
+		__ move(AT, (int)&jerome4);
+		__ sw(A0, AT, 0);
+		__ move(AT, (int)&jerome3);
+		__ sw(A1, AT, 0);
+        	__ move(AT, (int)&jerome2);
+		__ sw(A2, AT, 0);
+         //	__ move(AT, (int)&jerome1);
+	//	__ sw(A3, AT, 0);
+*/
+
+
+
+
+
+
+
+
+//		__ sw(RA, AT, 0);
+		__ jr(S5);
+	 //       __ jalr(T4);
+		__ delayed()->nop();
+       //  	__ move(AT, (int)&jerome10);
+	//	__ lw(RA, AT, 0);
+         //       __ jr(RA);
+	//	__ delayed()->nop();
+		//	__ lw(AT, ZERO, 16);;
+ //       __ nop();
+//	__ nop();
+}
+
+// ---------------------------------------------------------------
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+                                                            int total_args_passed,
+                                                            // VMReg max_arg,
+                                                            int comp_args_on_stack, // VMRegStackSlots
+                                                            const BasicType *sig_bt,
+                                                            const VMRegPair *regs) {
+  address i2c_entry = __ pc();
+
+  AdapterGenerator agen(masm);
+
+  agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+
+  // -------------------------------------------------------------------------
+  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
+  // args start out packed in the compiled layout.  They need to be unpacked
+  // into the interpreter layout.  This will almost always require some stack
+  // space.  We grow the current (compiled) stack, then repack the args.  We
+  // finally end in a jump to the generic interpreter entry point.  On exit
+  // from the interpreter, the interpreter will restore our SP (lest the
+  // compiled code, which relys solely on SP and not FP, get sick).
+
+  address c2i_unverified_entry = __ pc();
+  Label skip_fixup;
+  {
+		Register holder = T1;
+		Register receiver = T0;
+		Register temp = T8;
+		address ic_miss = SharedRuntime::get_ic_miss_stub();
+
+		Label missed;
+/*
+		__ move(AT, (int)&jerome1 );
+		__ sw(SP, AT, 0);
+	__ move(AT, (int)&jerome2 );
+	__ sw(FP, AT, 0);
+	__ move(AT, (int)&jerome3 );
+	__ sw(RA, AT, 0);
+//	__ sw(A0, AT, 0);
+
+	__ move(AT, (int)&jerome4 );
+	__ sw(T0, AT, 0);
+	__ move(AT, (int)&jerome5 );
+	__ sw(T1, AT, 0);
+	__ move(AT, (int)&jerome6 );
+	__ sw(V0, AT, 0);
+	__ move(AT, (int)&jerome7 );
+	__ sw(V1, AT, 0);
+	__ move(AT, (int)&jerome8 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome9 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome10 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome4 );
+	__ lw(T5, AT, 0);
+*/
+//	__ pushad();
+
+	//__ enter();
+//	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+				//relocInfo::runtime_call_type);
+//	__ delayed()->nop();
+
+//	__ popad();
+
+
+
+		__ verify_oop(holder);
+		// __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
+		__ lw(temp, receiver, oopDesc::klass_offset_in_bytes());
+		__ verify_oop(temp);
+
+		//  __ cmpl(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
+		__ lw (AT, holder, compiledICHolderOopDesc::holder_klass_offset());
+		//__ movl(ebx, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
+		__ lw(T7, holder, compiledICHolderOopDesc::holder_method_offset());
+		//__ jcc(Assembler::notEqual, missed);
+		__ bne(AT, temp, missed);
+		__ delayed()->nop();
+		// Method might have been compiled since the call site was patched to
+		// interpreted if that is the case treat it as a miss so we can get
+		// the call site corrected.
+		//__ cmpl(Address(ebx, in_bytes(methodOopDesc::code_offset())), NULL_WORD);
+		//__ jcc(Assembler::equal, skip_fixup);
+		__ lw(AT, T7, in_bytes(methodOopDesc::code_offset()));
+		__ beq(AT,ZERO, skip_fixup);
+		__ delayed()->nop();
+		__ bind(missed);
+	     //   __ move(AT, (int)&jerome7);
+	//	__ sw(RA, AT, 0);
+
+		__ jmp(ic_miss, relocInfo::runtime_call_type);
+		__ delayed()->nop();
+  }
+
+  address c2i_entry = __ pc();
+
+  agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+  __ flush();
+  return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry);
+
+}
+/*
+// Helper function for native calling conventions
+static VMReg int_stk_helper( int i ) {
+  // Bias any stack based VMReg we get by ignoring the window area
+  // but not the register parameter save area.
+  //
+  // This is strange for the following reasons. We'd normally expect
+  // the calling convention to return an VMReg for a stack slot
+  // completely ignoring any abi reserved area. C2 thinks of that
+  // abi area as only out_preserve_stack_slots. This does not include
+  // the area allocated by the C abi to store down integer arguments
+  // because the java calling convention does not use it. So
+  // since c2 assumes that there are only out_preserve_stack_slots
+  // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack
+  // location the c calling convention must add in this bias amount
+  // to make up for the fact that the out_preserve_stack_slots is
+  // insufficient for C calls. What a mess. I sure hope those 6
+  // stack words were worth it on every java call!
+
+  // Another way of cleaning this up would be for out_preserve_stack_slots
+  // to take a parameter to say whether it was C or java calling conventions.
+  // Then things might look a little better (but not much).
+
+  int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM;
+  if( mem_parm_offset < 0 ) {
+    return as_oRegister(i)->as_VMReg();
+  } else {
+    int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word;
+    // Now return a biased offset that will be correct when out_preserve_slots is added back in
+    return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots());
+  }
+}
+*/
+
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+                                         VMRegPair *regs,
+                                         int total_args_passed) {
+
+    // Return the number of VMReg stack_slots needed for the args.
+    // This value does not include an abi space (like register window
+    // save area).
+
+    // The native convention is V8 if !LP64
+    // The LP64 convention is the V9 convention which is slightly more sane.
+
+    // We return the amount of VMReg stack slots we need to reserve for all
+    // the arguments NOT counting out_preserve_stack_slots. Since we always
+    // have space for storing at least 6 registers to memory we start with that.
+    // See int_stk_helper for a further discussion.
+	// We return the amount of VMRegImpl stack slots we need to reserve for all
+	// the arguments NOT counting out_preserve_stack_slots.
+
+	uint    stack = 0;        // All arguments on stack
+	int     f2i = 0;
+	int     int_reg_max = 4; //A0,A1,A2,A3
+	int     fnt_reg_max = 2; //F12, F14
+	int     int_reg_cnt = 0;
+	int     flt_reg_cnt = 0;
+	for( int i = 0; i < total_args_passed; i++) {
+		// From the type and the argument number (count) compute the location
+		switch( sig_bt[i] ) {
+			case T_BOOLEAN:
+			case T_CHAR:
+			case T_BYTE:
+			case T_SHORT:
+			case T_INT:
+			case T_OBJECT:
+			case T_ARRAY:
+			case T_ADDRESS:
+				f2i=1;
+				if(int_reg_cnt == 0){
+					regs[i].set1(A0->as_VMReg());
+					int_reg_cnt++;
+				}
+				else if (int_reg_cnt == 1){
+					regs[i].set1(A1->as_VMReg());
+					int_reg_cnt++;
+				}else if(int_reg_cnt == 2){
+					regs[i].set1(A2->as_VMReg());
+					int_reg_cnt++;
+				}else if(int_reg_cnt == 3){
+					regs[i].set1(A3->as_VMReg());
+					int_reg_cnt++;
+				}else{
+					regs[i].set1(VMRegImpl::stack2reg(stack++));
+				}
+				break;
+			case T_FLOAT:
+				if(f2i==1){
+					if (int_reg_cnt == 1){
+						regs[i].set1(A1->as_VMReg());
+						int_reg_cnt++;
+					}else if(int_reg_cnt == 2){
+						regs[i].set1(A2->as_VMReg());
+						int_reg_cnt++;
+					}else if(int_reg_cnt == 3){
+						regs[i].set1(A3->as_VMReg());
+						int_reg_cnt++;
+					}else{
+						regs[i].set1(VMRegImpl::stack2reg(stack++));
+					}
+				}else{
+					if(flt_reg_cnt==0){
+						regs[i].set1(F12->as_VMReg());
+						flt_reg_cnt++;
+						int_reg_cnt++;
+					}
+					else if( flt_reg_cnt==1){
+						regs[i].set1(F14->as_VMReg());
+						flt_reg_cnt++;
+						if(int_reg_cnt<=1)  int_reg_cnt++;
+					}
+					else if(int_reg_cnt == 2){
+						regs[i].set1(A2->as_VMReg());
+						int_reg_cnt++;
+					}else if(int_reg_cnt == 3){
+						regs[i].set1(A3->as_VMReg());
+						int_reg_cnt++;
+					}else{
+						regs[i].set1(VMRegImpl::stack2reg(stack));
+						stack +=2;
+					}
+
+				}
+				break;
+			case T_LONG:
+				f2i=1;
+				if(int_reg_cnt == 0){
+					regs[i].set2(A0->as_VMReg());
+					int_reg_cnt += 2;
+				}
+				else if (int_reg_cnt == 1){
+					regs[i].set2(A2->as_VMReg());
+					int_reg_cnt += 3;
+				}else if(int_reg_cnt == 2){
+					regs[i].set2(A2->as_VMReg());
+					int_reg_cnt += 2;
+				}else if(int_reg_cnt == 3){
+					regs[i].set2(VMRegImpl::stack2reg(stack));
+					stack += 2;
+					int_reg_cnt += 1;
+				}else{
+					regs[i].set2(VMRegImpl::stack2reg(stack));
+					stack += 2;
+				}
+				break;
+			case T_DOUBLE:
+				assert(sig_bt[i+1] == T_VOID, "missing Half" );
+				if(f2i==1){
+					if (int_reg_cnt == 1){
+						regs[i].set2(A2->as_VMReg());
+						int_reg_cnt += 3;
+					}else if(int_reg_cnt == 2){
+						regs[i].set2(A2->as_VMReg());
+						int_reg_cnt += 2;
+					}else if(int_reg_cnt == 3){
+						regs[i].set2(VMRegImpl::stack2reg(stack));
+						stack += 2;
+						int_reg_cnt += 1;
+					}else{
+						regs[i].set2(VMRegImpl::stack2reg(stack));
+						stack += 2;
+					}
+				}
+				else {
+					if(flt_reg_cnt==0){
+						regs[i].set2(F12->as_VMReg());
+						flt_reg_cnt++;
+						int_reg_cnt += 2;
+					}
+					else if( flt_reg_cnt==2){
+						regs[i].set2(F14->as_VMReg());
+						flt_reg_cnt++;
+						if(int_reg_cnt<=1)int_reg_cnt += 1;
+					}
+					else if(int_reg_cnt == 2){
+						regs[i].set2(A2->as_VMReg());
+						int_reg_cnt +=2;
+					}else if(int_reg_cnt == 3){
+						int_reg_cnt++;
+						regs[i].set1(VMRegImpl::stack2reg(stack));
+						stack += 2;
+					}else{
+						regs[i].set1(VMRegImpl::stack2reg(stack));
+						stack += 2;
+					}
+
+				}
+				;
+				break;
+			case T_VOID: regs[i].set_bad(); break;
+			default:
+				     ShouldNotReachHere();
+				     break;
+		}
+	}
+	return stack ;
+}
+
+int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt,
+                                         VMRegPair *regs,
+                                         int total_args_passed) {
+// We return the amount of VMRegImpl stack slots we need to reserve for all
+// the arguments NOT counting out_preserve_stack_slots.
+   bool unalign = 0;
+  uint    stack = 0;        // All arguments on stack
+
+  for( int i = 0; i < total_args_passed; i++) {
+    // From the type and the argument number (count) compute the location
+    switch( sig_bt[i] ) {
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_FLOAT:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+      regs[i].set1(VMRegImpl::stack2reg(stack++));
+      unalign = !unalign;
+      break;
+    case T_LONG:
+    case T_DOUBLE: // The stack numbering is reversed from Java
+      // Since C arguments do not get reversed, the ordering for
+      // doubles on the stack must be opposite the Java convention
+      assert(sig_bt[i+1] == T_VOID, "missing Half" );
+      if(unalign){
+            stack += 1;
+     	    unalign = ! unalign;
+      }
+      regs[i].set2(VMRegImpl::stack2reg(stack));
+      stack += 2;
+      break;
+    case T_VOID: regs[i].set_bad(); break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+  return stack;
+}
+
+
+// ---------------------------------------------------------------------------
+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+	// We always ignore the frame_slots arg and just use the space just below frame pointer
+	// which by this time is free to use
+	switch (ret_type) {
+		case T_FLOAT:
+			__ swc1(FSF, FP, -wordSize);
+			break;
+		case T_DOUBLE:
+			__ swc1(FSF, FP, -wordSize );
+			__ swc1(SSF, FP, -2*wordSize );
+			break;
+		case T_VOID:  break;
+		case T_LONG:
+			      __ sw(V0, FP, -wordSize);
+			      __ sw(V1, FP, - 2*wordSize);
+			      break;
+		default: {
+				 __ sw(V0, FP, -wordSize);
+			 }
+	}
+}
+
+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+	// We always ignore the frame_slots arg and just use the space just below frame pointer
+	// which by this time is free to use
+	switch (ret_type) {
+		case T_FLOAT:
+			__ lwc1(FSF, FP, -wordSize);
+			break;
+		case T_DOUBLE:
+			__ lwc1(FSF, FP, -wordSize );
+			__ lwc1(SSF, FP, -2*wordSize );
+			break;
+		case T_LONG:
+			__ lw(V0, FP, -wordSize);
+			__ lw(V1, FP, - 2*wordSize);
+			break;
+		case T_VOID:  break;
+		default: {
+				 __ lw(V0, FP, -wordSize);
+			 }
+	}
+}
+
+
+// A simple move of integer like type
+static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
+      //__ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
+			__ lw(AT, FP, reg2offset_in(src.first()));
+			__ sw (AT,SP, reg2offset_out(dst.first()));
+    } else {
+      // stack to reg
+      //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
+			__ lw( dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    //__ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
+		__ sw( src.first()->as_Register(), SP, reg2offset_out(dst.first()));
+  } else {
+    //__ mov(src.first()->as_Register(), dst.first()->as_Register());
+		__ move(dst.first()->as_Register(), src.first()->as_Register());
+  }
+}
+/*
+// On 64 bit we will store integer like items to the stack as
+// 64 bits items (sparc abi) even though java would only store
+// 32bits for a parameter. On 32bit it will simply be 32 bits
+// So this routine will do 32->32 on 32bit and 32->64 on 64bit
+static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
+      __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
+    } else {
+      // stack to reg
+      __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
+  } else {
+    __ mov(src.first()->as_Register(), dst.first()->as_Register());
+  }
+}
+*/
+
+// An oop arg. Must pass a handle not the oop itself
+static void object_move(MacroAssembler* masm,
+                        OopMap* map,
+                        int oop_handle_offset,
+                        int framesize_in_slots,
+                        VMRegPair src,
+                        VMRegPair dst,
+                        bool is_receiver,
+                        int* receiver_offset) {
+
+  // must pass a handle. First figure out the location we use as a handle
+
+	//FIXME, for mips, dst can be register
+	if (src.first()->is_stack()) {
+		// Oop is already on the stack as an argument
+		Register rHandle = V0;
+		Label nil;
+		//__ xorl(rHandle, rHandle);
+		__ xorr(rHandle, rHandle, rHandle);
+		//__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD);
+		__ lw(AT, FP, reg2offset_in(src.first()));
+		//__ jcc(Assembler::equal, nil);
+		__ beq(AT,ZERO, nil);
+		__ delayed()->nop();
+		// __ leal(rHandle, Address(ebp, reg2offset_in(src.first())));
+		__ lea(rHandle, Address(FP, reg2offset_in(src.first())));
+		__ bind(nil);
+		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
+		if(dst.first()->is_stack())__ sw( rHandle, SP, reg2offset_out(dst.first()));
+		else                       __ move( (dst.first())->as_Register(),rHandle);
+		//if dst is register
+	//FIXME, do mips need out preserve stack slots?
+		int offset_in_older_frame = src.first()->reg2stack()
+			+ SharedRuntime::out_preserve_stack_slots();
+		map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
+		if (is_receiver) {
+			*receiver_offset = (offset_in_older_frame
+					+ framesize_in_slots) * VMRegImpl::stack_slot_size;
+		}
+	} else {
+		// Oop is in an a register we must store it to the space we reserve
+		// on the stack for oop_handles
+		const Register rOop = src.first()->as_Register();
+		//   const Register rHandle = eax;
+		const Register rHandle = V0;
+		//  int oop_slot = (rOop == ecx ? 0 : 1) * VMRegImpl::slots_per_word
+		//  + oop_handle_offset;
+//FIXME , refer to java_calling_convertion
+		int oop_slot = (rOop == T0 ? 0 : 1) * VMRegImpl::slots_per_word + oop_handle_offset;
+		int offset = oop_slot*VMRegImpl::stack_slot_size;
+		Label skip;
+		// __ movl(Address(esp, offset), rOop);
+		__ sw( rOop , SP, offset );
+		map->set_oop(VMRegImpl::stack2reg(oop_slot));
+		//    __ xorl(rHandle, rHandle);
+		__ xorr( rHandle, rHandle, rHandle);
+		//__ cmpl(rOop, NULL_WORD);
+		// __ jcc(Assembler::equal, skip);
+		__ beq(rOop, ZERO, skip);
+		__ delayed()->nop();
+		//  __ leal(rHandle, Address(esp, offset));
+		__ lea(rHandle, Address(SP, offset));
+		__ bind(skip);
+		// Store the handle parameter
+		//__ movl(Address(esp, reg2offset_out(dst.first())), rHandle);
+		if(dst.first()->is_stack())__ sw( rHandle, SP, reg2offset_out(dst.first()));
+		else                       __ move((dst.first())->as_Register(), rHandle);
+		//if dst is register
+
+		if (is_receiver) {
+			*receiver_offset = offset;
+		}
+	}
+}
+
+// A float arg may have to do float reg int reg conversion
+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
+
+	// Because of the calling convention we know that src is either a stack location
+	// or an xmm register. dst can only be a stack location.
+
+	assert(dst.first()->is_stack() && ( src.first()->is_stack() ||
+				src.first()->is_FloatRegister() ), "bad parameters");
+
+	if (src.first()->is_stack()) {
+		if(dst.first()->is_stack()){
+			//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
+			__ lwc1(F12 , FP, reg2offset_in(src.first()));
+			// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
+			__ swc1(F12 ,SP, reg2offset_out(dst.first()));
+		}
+		else
+			__ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
+	} else {
+		// reg to stack
+		// __ movss(Address(esp, reg2offset_out(dst.first())),
+		// src.first()->as_XMMRegister());
+		// __ movl(Address(esp, reg2offset_out(dst.first())), eax);
+		if(dst.first()->is_stack())
+			__ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
+		else
+			__ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+	}
+}
+/*
+static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  VMRegPair src_lo(src.first());
+  VMRegPair src_hi(src.second());
+  VMRegPair dst_lo(dst.first());
+  VMRegPair dst_hi(dst.second());
+  simple_move32(masm, src_lo, dst_lo);
+  simple_move32(masm, src_hi, dst_hi);
+}
+*/
+// A long move
+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+
+	// The only legal possibility for a long_move VMRegPair is:
+	// 1: two stack slots (possibly unaligned)
+	// as neither the java  or C calling convention will use registers
+	// for longs.
+
+	if (src.first()->is_stack()) {
+		assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
+		//  __ movl(eax, Address(ebp, reg2offset_in(src.first())));
+		if( dst.first()->is_stack()){
+			__ lw(AT, FP, reg2offset_in(src.first()));
+			//  __ movl(ebx, address(ebp, reg2offset_in(src.second())));
+			__ lw(V0, FP, reg2offset_in(src.second()));
+			// __ movl(address(esp, reg2offset_out(dst.first())), eax);
+			__ sw(AT, SP, reg2offset_out(dst.first()));
+			// __ movl(address(esp, reg2offset_out(dst.second())), ebx);
+			__ sw(V0, SP,  reg2offset_out(dst.second()));
+		} else{
+			__ lw( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
+			__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second()));
+		}
+	} else {
+		if( dst.first()->is_stack()){
+			__ sw( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
+			__ sw( (src.second())->as_Register(), SP,  reg2offset_out(dst.second()));
+		} else{
+			__ move( (dst.first())->as_Register() , (src.first())->as_Register());
+			__ move( (dst.second())->as_Register(), (src.second())->as_Register());
+		}
+	}
+}
+
+// A double move
+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+
+	// The only legal possibilities for a double_move VMRegPair are:
+	// The painful thing here is that like long_move a VMRegPair might be
+
+	// Because of the calling convention we know that src is either
+	//   1: a single physical register (xmm registers only)
+	//   2: two stack slots (possibly unaligned)
+	// dst can only be a pair of stack slots.
+
+	// assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() ||
+	// src.first()->is_stack()), "bad args");
+	//  assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args");
+
+	if (src.first()->is_stack()) {
+		// source is all stack
+		// __ movl(eax, Address(ebp, reg2offset_in(src.first())));
+		if( dst.first()->is_stack()){
+			__ lwc1(F12, FP, reg2offset_in(src.first()));
+			//__ movl(ebx, Address(ebp, reg2offset_in(src.second())));
+			__ lwc1(F14, FP, reg2offset_in(src.second()));
+
+			//   __ movl(Address(esp, reg2offset_out(dst.first())), eax);
+			__ swc1(F12, SP, reg2offset_out(dst.first()));
+			//  __ movl(Address(esp, reg2offset_out(dst.second())), ebx);
+			__ swc1(F14, SP, reg2offset_out(dst.second()));
+		} else{
+			__ lwc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
+			__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second()));
+		}
+
+	} else {
+		// reg to stack
+		// No worries about stack alignment
+		// __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
+		if( dst.first()->is_stack()){
+			__ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first()));
+			__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second()));
+		}
+		else
+			__ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+			__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister());
+
+	}
+}
+
+
+// ---------------------------------------------------------------------------
+// Generate a native wrapper for a given method.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// convention (handlizes oops, etc), transitions to native, makes the call,
+// returns to java state (possibly blocking), unhandlizes any result and
+// returns.
+nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                methodHandle method,
+                                                int total_in_args,
+                                                int comp_args_on_stack, // in VMRegStackSlots
+                                                BasicType *in_sig_bt,
+                                                VMRegPair *in_regs,
+                                                BasicType ret_type) {
+
+  // Native nmethod wrappers never take possesion of the oop arguments.
+  // So the caller will gc the arguments. The only thing we need an
+  // oopMap for is if the call is static
+  //
+  // An OopMap for lock (and class if static), and one for the VM call itself
+  OopMapSet *oop_maps = new OopMapSet();
+
+	// We have received a description of where all the java arg are located
+	// on entry to the wrapper. We need to convert these args to where
+	// the jni function will expect them. To figure out where they go
+	// we convert the java signature to a C signature by inserting
+	// the hidden arguments as arg[0] and possibly arg[1] (static method)
+//jerome_for_debug
+//  ResourceMark rm;
+ // printf("******native method %s \n", method->method_holder()->klass_part()->internal_name());
+	int total_c_args = total_in_args + 1;
+	if (method->is_static()) {
+		total_c_args++;
+	}
+
+	BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+	VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+
+	int argc = 0;
+	out_sig_bt[argc++] = T_ADDRESS;
+	if (method->is_static()) {
+		out_sig_bt[argc++] = T_OBJECT;
+	}
+
+  for (int i = 0; i < total_in_args ; i++ ) {
+    out_sig_bt[argc++] = in_sig_bt[i];
+  }
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require (neglecting out_preserve_stack_slots but space for storing
+  // the 1st six register arguments). It's weird see int_stk_helper.
+  //
+  int out_arg_slots;
+  //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
+	out_arg_slots = c_calling_convention_jni(out_sig_bt, out_regs, total_c_args);
+
+  // Compute framesize for the wrapper.  We need to handlize all oops in
+  // registers. We must create space for them here that is disjoint from
+  // the windowed save area because we have no control over when we might
+  // flush the window again and overwrite values that gc has since modified.
+  // (The live window race)
+  //
+  // We always just allocate 6 word for storing down these object. This allow
+  // us to simply record the base and use the Ireg number to decide which
+  // slot to use. (Note that the reg number is the inbound number not the
+  // outbound number).
+  // We must shuffle args to match the native convention, and include var-args space.
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Now the space for the inbound oop handle area
+
+  int oop_handle_offset = stack_slots;
+  //stack_slots += 6*VMRegImpl::slots_per_word;
+  stack_slots += 5*VMRegImpl::slots_per_word;
+
+  // Now any space we need for handlizing a klass if static method
+
+	int klass_slot_offset = 0;
+	int klass_offset = -1;
+	int lock_slot_offset = 0;
+	bool is_static = false;
+	int oop_temp_slot_offset = 0;
+/*
+	if (jvmpi::is_event_enabled(JVMPI_EVENT_METHOD_EXIT)) {
+		oop_temp_slot_offset = stack_slots;
+		stack_slots += VMRegImpl::slots_per_word;
+	}
+*/
+  if (method->is_static()) {
+    klass_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
+    is_static = true;
+  }
+
+  // Plus a lock if needed
+
+  if (method->is_synchronized()) {
+    lock_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+  }
+
+  // Now a place to save return value or as a temporary for any gpr -> fpr moves
+	// + 2 for return address (which we own) and saved ebp
+  //stack_slots += 2;
+  stack_slots += 4;
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      |---------------------|
+  //      | 2 slots for moves   |
+  //      |---------------------|
+  //      | lock box (if sync)  |
+  //      |---------------------| <- lock_slot_offset
+  //      | klass (if static)   |
+  //      |---------------------| <- klass_slot_offset
+  //      | oopHandle area      |
+  //      |---------------------| <- oop_handle_offset
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      | vararg area         |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+	intptr_t start = (intptr_t)__ pc();
+
+
+
+	// First thing make an ic check to see if we should even be here
+	address ic_miss = SharedRuntime::get_ic_miss_stub();
+
+	// We are free to use all registers as temps without saving them and
+	// restoring them except ebp. ebp is the only callee save register
+	// as far as the interpreter and the compiler(s) are concerned.
+
+        //refer to register_mips.hpp:IC_Klass
+	//  const Register ic_reg = eax;
+	const Register ic_reg = T1;
+	//  const Register receiver = ecx;
+	const Register receiver = T0;
+	Label hit;
+	Label exception_pending;
+
+	__ verify_oop(receiver);
+	// __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
+	__ lw(AT, receiver, oopDesc::klass_offset_in_bytes());
+	//__ jcc(Assembler::equal, hit);
+	__ beq(AT, ic_reg, hit);
+	__ delayed()->nop();
+	__ jmp(ic_miss, relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	// verified entry must be aligned for code patching.
+	// and the first 5 bytes must be in the same cache line
+	// if we align at 8 then we will be sure 5 bytes are in the same line
+	__ align(8);
+
+	__ bind(hit);
+
+
+	int vep_offset = ((intptr_t)__ pc()) - start;
+
+#ifdef COMPILER1
+	if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
+		// Object.hashCode can pull the hashCode from the header word
+		// instead of doing a full VM transition once it's been computed.
+		// Since hashCode is usually polymorphic at call sites we can't do
+		// this optimization at the call site without a lot of work.
+		Label slowCase;
+		// Register receiver = ecx;
+		Register receiver = T0;
+		//Register result = eax;
+		Register result = V0;
+		__ lw ( result, receiver, oopDesc::mark_offset_in_bytes());
+		// check if locked
+		__ andi(AT, result, markOopDesc::unlocked_value);
+		__ beq(AT, ZERO, slowCase);
+		__ delayed()->nop();
+		if (UseBiasedLocking) {
+			// Check if biased and fall through to runtime if so
+			__ andi (AT, result, markOopDesc::biased_lock_bit_in_place);
+			__ bne(AT,ZERO, slowCase);
+			__ delayed()->nop();
+		}
+		// get hash
+		__ andi (result, result,markOopDesc::hash_mask_in_place);
+		// test if hashCode exists
+		__ beq (result, ZERO, slowCase);
+		__ delayed()->nop();
+		__ shr(result, markOopDesc::hash_shift);
+		__ jr(RA);
+		__ delayed()->nop();
+		__ bind (slowCase);
+	}
+#endif // COMPILER1
+
+	// The instruction at the verified entry point must be 5 bytes or longer
+	// because it can be patched on the fly by make_non_entrant. The stack bang
+	// instruction fits that requirement.
+
+	// Generate stack overflow check
+
+	if (UseStackBanging) {
+	//this function will modify the value in A0
+		__ push(A0);
+		__ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
+		__ pop(A0);
+	} else {
+		// need a 5 byte instruction to allow MT safe patching to non-entrant
+		__ nop();
+		__ nop();
+		__ nop();
+		__ nop();
+		__ nop();
+	}
+	// Generate a new frame for the wrapper.
+	// do mips need this ?
+	__ get_thread(T8);
+//FIXME here
+	__ sw(SP, T8, in_bytes(JavaThread::last_Java_sp_offset()));
+	// -2 because return address is already present and so is saved ebp
+	__ move(AT, -8);
+	__ andr(SP, SP, AT);
+
+	__ enter();
+	__ move(AT, stack_size - 2*wordSize);
+	__ sub(SP, SP, AT);
+
+	// Frame is now completed as far a size and linkage.
+
+	int frame_complete = ((intptr_t)__ pc()) - start;
+
+	// Calculate the difference between esp and ebp. We need to know it
+	// after the native call because on windows Java Natives will pop
+	// the arguments and it is painful to do esp relative addressing
+	// in a platform independent way. So after the call we switch to
+	// ebp relative addressing.
+//FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change
+//the SP
+	int fp_adjustment = stack_size - 2*wordSize;
+
+#ifdef COMPILER2
+	// C2 may leave the stack dirty if not in SSE2+ mode
+	// if (UseSSE >= 2) {
+	//  __ verify_FPU(0, "c2i transition should have clean FPU stack");
+	//} else {
+	__ empty_FPU_stack();
+	//}
+#endif /* COMPILER2 */
+
+	// Compute the ebp offset for any slots used after the jni call
+
+	int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
+	int oop_temp_slot_ebp_offset = (oop_temp_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
+	// We use edi as a thread pointer because it is callee save and
+	// if we load it once it is usable thru the entire wrapper
+	// const Register thread = edi;
+	const Register thread = TREG;
+
+	// We use esi as the oop handle for the receiver/klass
+	// It is callee save so it survives the call to native
+
+	// const Register oop_handle_reg = esi;
+	const Register oop_handle_reg = S5;
+
+	__ get_thread(thread);
+
+
+  //
+  // We immediately shuffle the arguments so that any vm call we have to
+  // make from here on out (sync slow path, jvmpi, etc.) we will have
+  // captured the oops from our caller and have a valid oopMap for
+  // them.
+
+  // -----------------
+  // The Grand Shuffle
+  //
+  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
+  // and, if static, the class mirror instead of a receiver.  This pretty much
+  // guarantees that register layout will not match (and x86 doesn't use reg
+  // parms though amd does).  Since the native abi doesn't use register args
+  // and the java conventions does we don't have to worry about collisions.
+  // All of our moved are reg->stack or stack->stack.
+  // We ignore the extra arguments during the shuffle and handle them at the
+  // last moment. The shuffle is described by the two calling convention
+  // vectors we have in our possession. We simply walk the java vector to
+  // get the source locations and the c vector to get the destinations.
+
+	int c_arg = method->is_static() ? 2 : 1 ;
+
+	// Record esp-based slot for receiver on stack for non-static methods
+	int receiver_offset = -1;
+
+	// This is a trick. We double the stack slots so we can claim
+	// the oops in the caller's frame. Since we are sure to have
+	// more args than the caller doubling is enough to make
+	// sure we can capture all the incoming oop args from the
+	// caller.
+	//
+	OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+	// Mark location of ebp
+	// map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0,
+	// ebp->as_VMReg());
+
+	// We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx
+	// Are free to temporaries if we have to do  stack to steck moves.
+	// All inbound args are referenced based on ebp and all outbound args via esp.
+
+	for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
+		switch (in_sig_bt[i]) {
+			case T_ARRAY:
+			case T_OBJECT:
+			//object_move will modify the value in T4
+				object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i],
+				out_regs[c_arg],((i == 0) && (!is_static)),&receiver_offset);
+				break;
+			case T_VOID:
+				break;
+
+			case T_FLOAT:
+				float_move(masm, in_regs[i], out_regs[c_arg]);
+				break;
+
+			case T_DOUBLE:
+				assert( i + 1 < total_in_args &&
+						in_sig_bt[i + 1] == T_VOID &&
+						out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
+				double_move(masm, in_regs[i], out_regs[c_arg]);
+				break;
+
+			case T_LONG :
+				long_move(masm, in_regs[i], out_regs[c_arg]);
+				break;
+
+			case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
+
+			default:
+				simple_move32(masm, in_regs[i], out_regs[c_arg]);
+		}
+	}
+
+	// Pre-load a static method's oop into esi.  Used both by locking code and
+	// the normal JNI call code.
+	if (method->is_static()) {
+
+		//  load opp into a register
+		// __ movl(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
+		int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
+					Klass::cast(method->method_holder())->java_mirror()));
+
+		//printf("oop_index =0x%x \n", oop_index);
+
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local(
+			Klass::cast(method->method_holder())->java_mirror())));
+		__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int)
+		    JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror())));
+
+	//	__ verify_oop(oop_handle_reg);
+		// Now handlize the static class mirror it's known not-null.
+		// __ movl(Address(esp, klass_offset), oop_handle_reg);
+		__ sw( oop_handle_reg, SP, klass_offset);
+		map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+
+		// Now get the handle
+		__ lea(oop_handle_reg, Address(SP, klass_offset));
+		// store the klass handle as second argument
+		__ sw( oop_handle_reg, SP, wordSize);
+	}
+/*aoqi:FIXME
+	if (jvmpi::is_event_enabled(JVMPI_EVENT_METHOD_EXIT)) {
+	//__ movl(Address(esp, oop_temp_slot_offset * VMRegImpl::stack_slot_size), NULL_WORD);
+		__ sw(ZERO, SP, oop_temp_slot_offset * VMRegImpl::stack_slot_size);
+		map->set_oop(VMRegImpl::stack2reg(oop_temp_slot_offset));
+	}
+*/
+  // Change state to native (we save the return address in the thread, since it might not
+  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
+  // points into the right code segment. It does not have to be the correct return pc.
+  // We use the same pc/oopMap repeatedly when we call out
+
+	intptr_t the_pc = (intptr_t) __ pc();
+
+  //printf("native blob start = 0x%x, offset=0x%x \n", (int)start, (int)the_pc);
+	oop_maps->add_gc_map(the_pc - start, map);
+
+	//__ set_last_Java_frame(thread, esp, noreg, (address)the_pc);
+//jerome_for_debug
+	__ set_last_Java_frame(thread, SP, noreg, NULL);
+	__ relocate(relocInfo::internal_pc_type);
+	{
+	//	int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
+		int save_pc = (int)the_pc ;
+		__ lui(AT, Assembler::split_high(save_pc));
+		__ addiu(AT, AT, Assembler::split_low(save_pc));
+	}
+        __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+
+	// We have all of the arguments setup at this point. We must not touch any register
+	// argument registers at this point (what if we save/restore them there are no oop?
+/*aoqi:FIXME
+	// jvmpi support
+	if (jvmpi::is_event_enabled(JVMPI_EVENT_METHOD_ENTRY) ||
+			jvmpi::is_event_enabled(JVMPI_EVENT_METHOD_ENTRY2)) {
+
+	       //  __ push(A1);
+	        // __ push(A2);
+	        // __ push(A3);
+		// push the receiver
+
+
+		//__ movl(eax, JNIHandles::make_local(method()));
+    //__ move(A1, JNIHandles::make_local(method()));
+		int oop_index = __ oop_recorder()->find_index( JNIHandles::make_local(method()));
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(A1, Assembler::split_high((int)JNIHandles::make_local(method())));
+		__ addiu(A1, A1, Assembler::split_low((int)JNIHandles::make_local(method())));
+		if (method()->is_static()) {
+			//   __ pushl((int) NULL_WORD);
+			__ move (A2 , ZERO);
+		} else {
+			// __ pushl(Address(esp, receiver_offset));
+			__ lw(A2 , SP , receiver_offset);
+		}
+
+		//__ pushl(eax);
+
+		//__ pushl(thread);
+		__ move(A0, thread);
+		__ addi(SP,SP, -3*wordSize);
+		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::jvmpi_method_entry),
+				relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		//  __ addl(esp, 3*wordSize);
+		__ addi(SP,SP, 3*wordSize);
+	//	__ pop(A3);
+	//	__ pop(A2);
+	//	__ pop(A1);
+		// Any exception pending?
+		// __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
+		__ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+		//__ jcc(Assembler::notEqual, exception_pending);
+		__ bne(AT, ZERO, exception_pending);
+		__ delayed()->nop();
+	}
+*/
+	{
+		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
+		//  __ movl(eax, JNIHandles::make_local(method()));
+		int oop_index = __ oop_recorder()->find_index( JNIHandles::make_local(method()));
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
+		__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
+
+
+	 //        __ push(A1);
+	  //       __ push(A2);
+	   //      __ push(A3);
+
+		__ call_VM_leaf(
+				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+				//   thread, eax);
+		   thread, T6);
+	//	__ pop(A3);
+	//	__ pop(A2);
+	//	__ pop(A1);
+
+	}
+
+
+  // These are register definitions we need for locking/unlocking
+//  const Register swap_reg = eax;  // Must use eax for cmpxchg instruction
+//  const Register obj_reg  = ecx;  // Will contain the oop
+ // const Register lock_reg = edx;  // Address of compiler lock object (BasicLock)
+//FIXME, I hava no idea which register to use
+	const Register swap_reg = T8;  // Must use eax for cmpxchg instruction
+	const Register obj_reg  = V0;  // Will contain the oop
+	const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
+
+
+
+	Label slow_path_lock;
+	Label lock_done;
+
+	// Lock a synchronized method
+	if (method->is_synchronized()) {
+
+
+		const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
+
+		// Get the handle (the 2nd argument)
+		//__ movl(oop_handle_reg, Address(esp, wordSize));
+		__ lw( oop_handle_reg, SP, wordSize);
+		// Get address of the box
+
+		__ lea(lock_reg, Address(FP, lock_slot_ebp_offset));
+
+		// Load the oop from the handle
+		__ lw(obj_reg, oop_handle_reg, 0);
+
+		if (UseBiasedLocking) {
+			// Note that oop_handle_reg is trashed during this call
+		__ biased_locking_enter(lock_reg, obj_reg, swap_reg, oop_handle_reg,
+				false, lock_done, &slow_path_lock);
+		}
+
+		// Load immediate 1 into swap_reg %eax
+		__ move(swap_reg, 1);
+
+		__ lw(AT, obj_reg, 0);
+		__ orr(swap_reg, swap_reg, AT);
+
+		__ sw( swap_reg, lock_reg, mark_word_offset);
+		__ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
+		__ bne(AT, ZERO, lock_done);
+		__ delayed()->nop();
+		// Test if the oopMark is an obvious stack pointer, i.e.,
+		//  1) (mark & 3) == 0, and
+		//  2) esp <= mark < mark + os::pagesize()
+		// These 3 tests can be done by evaluating the following
+		// expression: ((mark - esp) & (3 - os::vm_page_size())),
+		// assuming both stack pointer and pagesize have their
+		// least significant 2 bits clear.
+		// NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg
+
+		__ sub(swap_reg, swap_reg,SP);
+ 		__ move(AT, 3 - os::vm_page_size());
+		__ andr(swap_reg , swap_reg, AT);
+		// Save the test result, for recursive case, the result is zero
+		__ sw(swap_reg, lock_reg, mark_word_offset);
+	//FIXME here, Why notEqual?
+		__ bne(swap_reg,ZERO, slow_path_lock);
+		__ delayed()->nop();
+		// Slow path will re-enter here
+		__ bind(lock_done);
+
+		if (UseBiasedLocking) {
+			// Re-fetch oop_handle_reg as we trashed it above
+			//  __ movl(oop_handle_reg, Address(esp, wordSize));
+			__ lw(oop_handle_reg, SP, wordSize);
+		}
+	}
+
+
+	// Finally just about ready to make the JNI call
+
+
+	// get JNIEnv* which is first argument to native
+
+	__ addi(AT, thread, in_bytes(JavaThread::jni_environment_offset()));
+	__ sw(AT, SP, 0);
+
+	__ lw(A0, SP,  0 );
+	__ lw(A1, SP , 4 );
+	__ lw(A2, SP , 8 );
+	__ lw(A3, SP , 12 );
+
+
+	// Now set thread in native
+	__ addi(AT, ZERO, _thread_in_native);
+	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
+	__ call(method->native_function(), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	// WARNING - on Windows Java Natives use pascal calling convention and pop the
+	// arguments off of the stack. We could just re-adjust the stack pointer here
+	// and continue to do SP relative addressing but we instead switch to FP
+	// relative addressing.
+
+	// Unpack native results.
+	switch (ret_type) {
+	case T_BOOLEAN: __ c2bool(V0);            break;
+	case T_CHAR   : __ andi(V0,V0, 0xFFFF);      break;
+	case T_BYTE   : __ sign_extend_byte (V0); break;
+	case T_SHORT  : __ sign_extend_short(V0); break;
+	case T_INT    : // nothing to do         break;
+	case T_DOUBLE :
+	case T_FLOAT  :
+	// Result is in st0 we'll save as needed
+	break;
+	case T_ARRAY:                 // Really a handle
+	case T_OBJECT:                // Really a handle
+	break; // can't de-handlize until after safepoint check
+	case T_VOID: break;
+	case T_LONG: break;
+	default       : ShouldNotReachHere();
+	}
+	// Switch thread to "native transition" state before reading the synchronization state.
+	// This additional state is necessary because reading and testing the synchronization
+	// state is not atomic w.r.t. GC, as this scenario demonstrates:
+	//     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+	//     VM thread changes sync state to synchronizing and suspends threads for GC.
+	//     Thread A is resumed to finish this native method, but doesn't block here since it
+	//     didn't see any synchronization is progress, and escapes.
+	// __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
+	//__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset());
+	//   __ move(AT, (int)_thread_in_native_trans);
+	__ addi(AT, ZERO, _thread_in_native_trans);
+	__ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
+
+	//  if(os::is_MP()) {
+	//   if (UseMembar) {
+	//    __ membar(); // Force this write out before the read below
+	// } else {
+	// Write serialization page so VM thread can do a pseudo remote membar.
+	// We use the current thread pointer to calculate a thread specific
+	// offset to write to within the page. This minimizes bus traffic
+	// due to cache line collision.
+	// __ serialize_memory(thread, ecx);
+	//__ serialize_memory(thread, V0);
+	// }
+	//}
+
+
+	// check for safepoint operation in progress and/or pending suspend requests
+	{ Label Continue;
+//FIXME here, which regiser should we use?
+		//        SafepointSynchronize::_not_synchronized);
+		__ move(AT, (int)SafepointSynchronize::address_of_state());
+		__ lw(A0, AT, 0);
+		__ addi(AT, A0, -SafepointSynchronize::_not_synchronized);
+		Label L;
+		__ bne(AT,ZERO, L);
+		__ delayed()->nop();
+		__ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
+		__ beq(AT, ZERO, Continue);
+		__ delayed()->nop();
+		__ bind(L);
+
+		// Don't use call_VM as it will see a possible pending exception and forward it
+		// and never return here preventing us from clearing _last_native_pc down below.
+		// Also can't use call_VM_leaf either as it will check to see if esi & edi are
+		// preserved and correspond to the bcp/locals pointers. So we do a runtime call
+		// by hand.
+		//
+		save_native_result(masm, ret_type, stack_slots);
+		__ move (A0, thread);
+		__ addi(SP,SP, -wordSize);
+		__ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		__ addi(SP,SP, wordSize);
+		// Restore any method result value
+		restore_native_result(masm, ret_type, stack_slots);
+
+		__ bind(Continue);
+	}
+
+	// change thread state
+	__ addi(AT, ZERO, _thread_in_Java);
+	__ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
+	Label reguard;
+	Label reguard_done;
+	__ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
+	__ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled);
+	__ beq(AT, ZERO, reguard);
+	__ delayed()->nop();
+	// slow path reguard  re-enters here
+	__ bind(reguard_done);
+
+	// Handle possible exception (will unlock if necessary)
+
+	// native result if any is live
+
+	// Unlock
+	Label slow_path_unlock;
+	Label unlock_done;
+	if (method->is_synchronized()) {
+
+		Label done;
+
+		// Get locked oop from the handle we passed to jni
+		__ lw( obj_reg, oop_handle_reg, 0);
+		//FIXME
+		if (UseBiasedLocking) {
+			__ biased_locking_exit(obj_reg, T8, done);
+
+		}
+
+		// Simple recursive lock?
+
+		__ lw(AT, FP, lock_slot_ebp_offset);
+		__ beq(AT, ZERO, done);
+		__ delayed()->nop();
+		// Must save eax if if it is live now because cmpxchg must use it
+		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+			save_native_result(masm, ret_type, stack_slots);
+		}
+
+		//  get old displaced header
+		__ lw (T8, FP, lock_slot_ebp_offset);
+		// get address of the stack lock
+		__ addi (T6, FP, lock_slot_ebp_offset);
+		// Atomic swap old header if oop still contains the stack lock
+		__ cmpxchg(T8, Address(obj_reg, 0),T6 );
+
+		__ beq(AT, ZERO, slow_path_unlock);
+		__ delayed()->nop();
+		// slow path re-enters here
+		__ bind(unlock_done);
+		if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+			restore_native_result(masm, ret_type, stack_slots);
+		}
+
+		__ bind(done);
+
+	}
+/*aoqi:FIXME
+	// Tell jvmpi about this method exit
+	if (jvmpi::is_event_enabled(JVMPI_EVENT_METHOD_EXIT)) {
+		save_native_result(masm, ret_type, stack_slots);
+		// Save any pending exception and clear it from the thread
+		//  __ movl(eax, Address(thread, in_bytes(Thread::pending_exception_offset())));
+		__ lw(T6, thread, in_bytes(Thread::pending_exception_offset()));
+		//__ movl(Address(ebp, oop_temp_slot_ebp_offset), eax);
+		__ sw(T6, FP, oop_temp_slot_ebp_offset);
+		//__ movl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
+		__ sw(ZERO, thread, in_bytes(Thread::pending_exception_offset()));
+		//__ pushl(JNIHandles::make_local(method()));
+		//__ pushl(thread);
+		//__ move(A1, JNIHandles::make_local(method()));
+		int oop_index = __ oop_recorder()->find_index( JNIHandles::make_local(method()));
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(A1, Assembler::split_high((int)JNIHandles::make_local(method())));
+		__ addiu(A1, A1, Assembler::split_low((int)JNIHandles::make_local(method())));
+		__ move(A0, thread);
+		__ addi(SP, SP , - 2*wordSize);
+		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::jvmpi_method_exit), relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		__ addi(SP, SP ,  2*wordSize);
+		//__ addl(esp, 2*wordSize);
+		Label L;
+		// If we had a pending exception before jvmpi call it takes precedence
+		//    __ movl(eax, Address(ebp, oop_temp_slot_ebp_offset));
+		__ lw(T6, FP, oop_temp_slot_ebp_offset);
+		// __ testl(eax, eax);
+		//__ jcc(Assembler::equal, L);
+		__ beq(T6, ZERO, L);
+		__ delayed()->nop();
+		//   __ movl(Address(thread, in_bytes(Thread::pending_exception_offset())), eax);
+	    //    __ move(AT, (int)&jerome2);
+	//	__ sw(T6, AT, 0);
+		__ sw(T6, thread, in_bytes(Thread::pending_exception_offset()));
+		__ bind(L);
+		restore_native_result(masm, ret_type, stack_slots);
+	}
+*/
+	{
+		SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
+		// Tell dtrace about this method exit
+		save_native_result(masm, ret_type, stack_slots);
+		int oop_index = __ oop_recorder()->find_index( JNIHandles::make_local(method()));
+		RelocationHolder rspec = oop_Relocation::spec(oop_index);
+		__ relocate(rspec);
+		__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method())));
+		__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method())));
+
+
+		__ call_VM_leaf(
+				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+				thread, T6);
+		restore_native_result(masm, ret_type, stack_slots);
+	}
+
+	// We can finally stop using that last_Java_frame we setup ages ago
+
+	__ reset_last_Java_frame(thread, false, true);
+
+	// Unpack oop result
+	if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+		Label L;
+		//  __ cmpl(eax, NULL_WORD);
+		//  __ jcc(Assembler::equal, L);
+		__ beq(V0, ZERO,L );
+		__ delayed()->nop();
+		//  __ movl(eax, Address(eax));
+		__ lw(V0, V0, 0);
+		__ bind(L);
+		// __ verify_oop(eax);
+		__ verify_oop(V0);
+  }
+
+	// reset handle block
+	__ lw(AT, thread, in_bytes(JavaThread::active_handles_offset()));
+	__ sw(ZERO, AT, JNIHandleBlock::top_offset_in_bytes());
+	// Any exception pending?
+	__ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+
+	__ bne(AT, ZERO, exception_pending);
+	__ delayed()->nop();
+	// no exception, we're almost done
+
+	// check that only result value is on FPU stack
+	__ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
+
+  // Fixup floating pointer results so that result looks like a return from a compiled method
+/*  if (ret_type == T_FLOAT) {
+    if (UseSSE >= 1) {
+      // Pop st0 and store as float and reload into xmm register
+      __ fstp_s(Address(ebp, -4));
+      __ movss(xmm0, Address(ebp, -4));
+    }
+  } else if (ret_type == T_DOUBLE) {
+    if (UseSSE >= 2) {
+      // Pop st0 and store as double and reload into xmm register
+      __ fstp_d(Address(ebp, -8));
+      __ movsd(xmm0, Address(ebp, -8));
+    }
+  }
+*/
+  // Return
+	__ get_thread(T8);
+	__ lw(SP,T8,in_bytes(JavaThread::last_Java_sp_offset()));
+	__ leave();
+
+	__ jr(RA);
+	__ delayed()->nop();
+	// Unexpected paths are out of line and go here
+
+	// Slow path locking & unlocking
+	if (method->is_synchronized()) {
+
+		// BEGIN Slow path lock
+
+		__ bind(slow_path_lock);
+
+		// has last_Java_frame setup. No exceptions so do vanilla call not call_VM
+		// args are (oop obj, BasicLock* lock, JavaThread* thread)
+
+		__ move(A0, obj_reg);
+		__ move(A1, lock_reg);
+		__ move(A2, thread);
+		__ addi(SP, SP, - 3*wordSize);
+		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		__ addi(SP, SP, 3*wordSize);
+#ifdef ASSERT
+		{ Label L;
+			// __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
+			__ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+			//__ jcc(Assembler::equal, L);
+			__ beq(AT, ZERO, L);
+			__ delayed()->nop();
+			__ stop("no pending exception allowed on exit from monitorenter");
+			__ bind(L);
+		}
+#endif
+		__ b(lock_done);
+		__ delayed()->nop();
+		// END Slow path lock
+
+		// BEGIN Slow path unlock
+		__ bind(slow_path_unlock);
+
+		// Slow path unlock
+
+		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+			save_native_result(masm, ret_type, stack_slots);
+		}
+		// Save pending exception around call to VM (which contains an EXCEPTION_MARK)
+
+		__ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+		__ push(AT);
+		__ sw(ZERO, thread, in_bytes(Thread::pending_exception_offset()));
+
+		// should be a peal
+		// +wordSize because of the push above
+		__ addi(A1, FP, lock_slot_ebp_offset);
+
+		__ move(A0, obj_reg);
+		__ addi(SP,SP, -2*wordSize);
+		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
+				relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		__ addi(SP,SP, 2*wordSize);
+#ifdef ASSERT
+		{
+			Label L;
+			//    __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
+			__ lw( AT, thread, in_bytes(Thread::pending_exception_offset()));
+			//__ jcc(Assembler::equal, L);
+			__ beq(AT, ZERO, L);
+			__ delayed()->nop();
+			__ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
+			__ bind(L);
+		}
+#endif /* ASSERT */
+
+		__ pop(AT);
+		__ sw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+		if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+			restore_native_result(masm, ret_type, stack_slots);
+		}
+		__ b(unlock_done);
+		__ delayed()->nop();
+		// END Slow path unlock
+
+	}
+
+	// SLOW PATH Reguard the stack if needed
+
+	__ bind(reguard);
+	save_native_result(masm, ret_type, stack_slots);
+	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
+			relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	restore_native_result(masm, ret_type, stack_slots);
+	__ b(reguard_done);
+	__ delayed()->nop();
+
+	// BEGIN EXCEPTION PROCESSING
+
+	// Forward  the exception
+	__ bind(exception_pending);
+
+	// remove possible return value from FPU register stack
+	__ empty_FPU_stack();
+
+	// pop our frame
+ //forward_exception_entry need return address on stack
+        __ addiu(SP, FP, wordSize);
+	__ lw(FP, SP, (-1) * wordSize);
+
+	// and forward the exception
+	__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	__ flush();
+
+	nmethod *nm = nmethod::new_native_nmethod(method,
+			masm->code(),
+			vep_offset,
+			frame_complete,
+			stack_slots / VMRegImpl::slots_per_word,
+			(is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+			in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
+			oop_maps);
+	return nm;
+
+
+}
+
+#ifdef HAVE_DTRACE_H
+// ---------------------------------------------------------------------------
+// Generate a dtrace nmethod for a given signature.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// abi and then leaves nops at the position you would expect to call a native
+// function. When the probe is enabled the nops are replaced with a trap
+// instruction that dtrace inserts and the trace will cause a notification
+// to dtrace.
+//
+// The probes are only able to take primitive types and java/lang/String as
+// arguments.  No other java types are allowed. Strings are converted to utf8
+// strings so that from dtrace point of view java strings are converted to C
+// strings. There is an arbitrary fixed limit on the total space that a method
+// can use for converting the strings. (256 chars per string in the signature).
+// So any java string larger then this is truncated.
+
+static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
+static bool offsets_initialized = false;
+
+static VMRegPair reg64_to_VMRegPair(Register r) {
+  VMRegPair ret;
+  if (wordSize == 8) {
+    ret.set2(r->as_VMReg());
+  } else {
+    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
+  }
+  return ret;
+}
+
+
+nmethod *SharedRuntime::generate_dtrace_nmethod(
+    MacroAssembler *masm, methodHandle method) {
+
+
+  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
+  // be single threaded in this method.
+  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
+
+  // Fill in the signature array, for the calling-convention call.
+  int total_args_passed = method->size_of_parameters();
+
+  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
+  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
+
+  // The signature we are going to use for the trap that dtrace will see
+  // java/lang/String is converted. We drop "this" and any other object
+  // is converted to NULL.  (A one-slot java/lang/Long object reference
+  // is converted to a two-slot long, which is why we double the allocation).
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
+
+  int i=0;
+  int total_strings = 0;
+  int first_arg_to_pass = 0;
+  int total_c_args = 0;
+
+  // Skip the receiver as dtrace doesn't want to see it
+  if( !method->is_static() ) {
+    in_sig_bt[i++] = T_OBJECT;
+    first_arg_to_pass = 1;
+  }
+
+  SignatureStream ss(method->signature());
+  for ( ; !ss.at_return_type(); ss.next()) {
+    BasicType bt = ss.type();
+    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
+    out_sig_bt[total_c_args++] = bt;
+    if( bt == T_OBJECT) {
+      symbolOop s = ss.as_symbol_or_null();
+      if (s == vmSymbols::java_lang_String()) {
+        total_strings++;
+        out_sig_bt[total_c_args-1] = T_ADDRESS;
+      } else if (s == vmSymbols::java_lang_Boolean() ||
+                 s == vmSymbols::java_lang_Byte()) {
+        out_sig_bt[total_c_args-1] = T_BYTE;
+      } else if (s == vmSymbols::java_lang_Character() ||
+                 s == vmSymbols::java_lang_Short()) {
+        out_sig_bt[total_c_args-1] = T_SHORT;
+      } else if (s == vmSymbols::java_lang_Integer() ||
+                 s == vmSymbols::java_lang_Float()) {
+        out_sig_bt[total_c_args-1] = T_INT;
+      } else if (s == vmSymbols::java_lang_Long() ||
+                 s == vmSymbols::java_lang_Double()) {
+        out_sig_bt[total_c_args-1] = T_LONG;
+        out_sig_bt[total_c_args++] = T_VOID;
+      }
+    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
+      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
+      // We convert double to long
+      out_sig_bt[total_c_args-1] = T_LONG;
+      out_sig_bt[total_c_args++] = T_VOID;
+    } else if ( bt == T_FLOAT) {
+      // We convert float to int
+      out_sig_bt[total_c_args-1] = T_INT;
+    }
+  }
+
+  assert(i==total_args_passed, "validly parsed signature");
+
+  // Now get the compiled-Java layout as input arguments
+  int comp_args_on_stack;
+  comp_args_on_stack = SharedRuntime::java_calling_convention(
+      in_sig_bt, in_regs, total_args_passed, false);
+
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the a  native (non-jni) function would expect them. To figure out
+  // where they go we convert the java signature to a C signature and remove
+  // T_VOID for any long/double we might have received.
+
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require (neglecting out_preserve_stack_slots but space for storing
+  // the 1st six register arguments). It's weird see int_stk_helper.
+  //
+  int out_arg_slots;
+  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Plus a temp for possible converion of float/double/long register args
+
+  int conversion_temp = stack_slots;
+  stack_slots += 2;
+
+
+  // Now space for the string(s) we must convert
+
+  int string_locs = stack_slots;
+  stack_slots += total_strings *
+                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      |---------------------|
+  //      | string[n]           |
+  //      |---------------------| <- string_locs[n]
+  //      | string[n-1]         |
+  //      |---------------------| <- string_locs[n-1]
+  //      | ...                 |
+  //      | ...                 |
+  //      |---------------------| <- string_locs[1]
+  //      | string[0]           |
+  //      |---------------------| <- string_locs[0]
+  //      | temp                |
+  //      |---------------------| <- conversion_temp
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+  intptr_t start = (intptr_t)__ pc();
+
+  // First thing make an ic check to see if we should even be here
+
+  {
+    Label L;
+    const Register temp_reg = G3_scratch;
+    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
+    __ verify_oop(O0);
+    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
+    __ cmp(temp_reg, G5_inline_cache_reg);
+    __ brx(Assembler::equal, true, Assembler::pt, L);
+    __ delayed()->nop();
+
+    __ jump_to(ic_miss, 0);
+    __ delayed()->nop();
+    __ align(CodeEntryAlignment);
+    __ bind(L);
+  }
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
+
+
+  // The instruction at the verified entry point must be 5 bytes or longer
+  // because it can be patched on the fly by make_non_entrant. The stack bang
+  // instruction fits that requirement.
+
+  // Generate stack overflow check before creating frame
+  __ generate_stack_overflow_check(stack_size);
+
+  assert(((intptr_t)__ pc() - start - vep_offset) >= 5,
+         "valid size for make_non_entrant");
+
+  // Generate a new frame for the wrapper.
+  __ save(SP, -stack_size, SP);
+
+  // Frame is now completed as far a size and linkage.
+
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+    reg_destroyed[r] = false;
+  }
+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+    freg_destroyed[f] = false;
+  }
+
+#endif /* ASSERT */
+
+  VMRegPair zero;
+  const Register g0 = G0; // without this we get a compiler warning (why??)
+  zero.set2(g0->as_VMReg());
+
+  int c_arg, j_arg;
+
+  Register conversion_off = noreg;
+
+  for (j_arg = first_arg_to_pass, c_arg = 0 ;
+       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
+
+    VMRegPair src = in_regs[j_arg];
+    VMRegPair dst = out_regs[c_arg];
+
+#ifdef ASSERT
+    if (src.first()->is_Register()) {
+      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
+    } else if (src.first()->is_FloatRegister()) {
+      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
+                                               FloatRegisterImpl::S)], "ack!");
+    }
+    if (dst.first()->is_Register()) {
+      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
+    } else if (dst.first()->is_FloatRegister()) {
+      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
+                                                 FloatRegisterImpl::S)] = true;
+    }
+#endif /* ASSERT */
+
+    switch (in_sig_bt[j_arg]) {
+      case T_ARRAY:
+      case T_OBJECT:
+        {
+          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
+              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
+            // need to unbox a one-slot value
+            Register in_reg = L0;
+            Register tmp = L2;
+            if ( src.first()->is_reg() ) {
+              in_reg = src.first()->as_Register();
+            } else {
+              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
+                     "must be");
+              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
+            }
+            // If the final destination is an acceptable register
+            if ( dst.first()->is_reg() ) {
+              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
+                tmp = dst.first()->as_Register();
+              }
+            }
+
+            Label skipUnbox;
+            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
+              __ mov(G0, tmp->successor());
+            }
+            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
+            __ delayed()->mov(G0, tmp);
+
+            BasicType bt = out_sig_bt[c_arg];
+            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
+            switch (bt) {
+                case T_BYTE:
+                  __ ldub(in_reg, box_offset, tmp); break;
+                case T_SHORT:
+                  __ lduh(in_reg, box_offset, tmp); break;
+                case T_INT:
+                  __ ld(in_reg, box_offset, tmp); break;
+                case T_LONG:
+                  __ ld_long(in_reg, box_offset, tmp); break;
+                default: ShouldNotReachHere();
+            }
+
+            __ bind(skipUnbox);
+            // If tmp wasn't final destination copy to final destination
+            if (tmp == L2) {
+              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
+              if (out_sig_bt[c_arg] == T_LONG) {
+                long_move(masm, tmp_as_VM, dst);
+              } else {
+                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
+              }
+            }
+            if (out_sig_bt[c_arg] == T_LONG) {
+              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
+              ++c_arg; // move over the T_VOID to keep the loop indices in sync
+            }
+          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
+            Register s =
+                src.first()->is_reg() ? src.first()->as_Register() : L2;
+            Register d =
+                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
+
+            // We store the oop now so that the conversion pass can reach
+            // while in the inner frame. This will be the only store if
+            // the oop is NULL.
+            if (s != L2) {
+              // src is register
+              if (d != L2) {
+                // dst is register
+                __ mov(s, d);
+              } else {
+                assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                          STACK_BIAS), "must be");
+                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
+              }
+            } else {
+                // src not a register
+                assert(Assembler::is_simm13(reg2offset(src.first()) +
+                           STACK_BIAS), "must be");
+                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
+                if (d == L2) {
+                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                             STACK_BIAS), "must be");
+                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
+                }
+            }
+          } else if (out_sig_bt[c_arg] != T_VOID) {
+            // Convert the arg to NULL
+            if (dst.first()->is_reg()) {
+              __ mov(G0, dst.first()->as_Register());
+            } else {
+              assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                         STACK_BIAS), "must be");
+              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
+            }
+          }
+        }
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        if (src.first()->is_stack()) {
+          // Stack to stack/reg is simple
+          move32_64(masm, src, dst);
+        } else {
+          if (dst.first()->is_reg()) {
+            // freg -> reg
+            int off =
+              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+            Register d = dst.first()->as_Register();
+            if (Assembler::is_simm13(off)) {
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, off);
+              __ ld(SP, off, d);
+            } else {
+              if (conversion_off == noreg) {
+                __ set(off, L6);
+                conversion_off = L6;
+              }
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, conversion_off);
+              __ ld(SP, conversion_off , d);
+            }
+          } else {
+            // freg -> mem
+            int off = STACK_BIAS + reg2offset(dst.first());
+            if (Assembler::is_simm13(off)) {
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, off);
+            } else {
+              if (conversion_off == noreg) {
+                __ set(off, L6);
+                conversion_off = L6;
+              }
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, conversion_off);
+            }
+          }
+        }
+        break;
+
+      case T_DOUBLE:
+        assert( j_arg + 1 < total_args_passed &&
+                in_sig_bt[j_arg + 1] == T_VOID &&
+                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
+        if (src.first()->is_stack()) {
+          // Stack to stack/reg is simple
+          long_move(masm, src, dst);
+        } else {
+          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
+
+          // Destination could be an odd reg on 32bit in which case
+          // we can't load direct to the destination.
+
+          if (!d->is_even() && wordSize == 4) {
+            d = L2;
+          }
+          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+          if (Assembler::is_simm13(off)) {
+            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
+                   SP, off);
+            __ ld_long(SP, off, d);
+          } else {
+            if (conversion_off == noreg) {
+              __ set(off, L6);
+              conversion_off = L6;
+            }
+            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
+                   SP, conversion_off);
+            __ ld_long(SP, conversion_off, d);
+          }
+          if (d == L2) {
+            long_move(masm, reg64_to_VMRegPair(L2), dst);
+          }
+        }
+        break;
+
+      case T_LONG :
+        // 32bit can't do a split move of something like g1 -> O0, O1
+        // so use a memory temp
+        if (src.is_single_phys_reg() && wordSize == 4) {
+          Register tmp = L2;
+          if (dst.first()->is_reg() &&
+              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
+            tmp = dst.first()->as_Register();
+          }
+
+          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+          if (Assembler::is_simm13(off)) {
+            __ stx(src.first()->as_Register(), SP, off);
+            __ ld_long(SP, off, tmp);
+          } else {
+            if (conversion_off == noreg) {
+              __ set(off, L6);
+              conversion_off = L6;
+            }
+            __ stx(src.first()->as_Register(), SP, conversion_off);
+            __ ld_long(SP, conversion_off, tmp);
+          }
+
+          if (tmp == L2) {
+            long_move(masm, reg64_to_VMRegPair(L2), dst);
+          }
+        } else {
+          long_move(masm, src, dst);
+        }
+        break;
+
+      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
+
+      default:
+        move32_64(masm, src, dst);
+    }
+  }
+
+
+  // If we have any strings we must store any register based arg to the stack
+  // This includes any still live xmm registers too.
+
+  if (total_strings > 0 ) {
+
+    // protect all the arg registers
+    __ save_frame(0);
+    __ mov(G2_thread, L7_thread_cache);
+    const Register L2_string_off = L2;
+
+    // Get first string offset
+    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
+
+    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
+      if (out_sig_bt[c_arg] == T_ADDRESS) {
+
+        VMRegPair dst = out_regs[c_arg];
+        const Register d = dst.first()->is_reg() ?
+            dst.first()->as_Register()->after_save() : noreg;
+
+        // It's a string the oop and it was already copied to the out arg
+        // position
+        if (d != noreg) {
+          __ mov(d, O0);
+        } else {
+          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
+                 "must be");
+          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
+        }
+        Label skip;
+
+        __ br_null(O0, false, Assembler::pn, skip);
+        __ delayed()->add(FP, L2_string_off, O1);
+
+        if (d != noreg) {
+          __ mov(O1, d);
+        } else {
+          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
+                 "must be");
+          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
+        }
+
+        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
+                relocInfo::runtime_call_type);
+        __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off);
+
+        __ bind(skip);
+
+      }
+
+    }
+    __ mov(L7_thread_cache, G2_thread);
+    __ restore();
+
+  }
+
+
+  // Ok now we are done. Need to place the nop that dtrace wants in order to
+  // patch in the trap
+
+  int patch_offset = ((intptr_t)__ pc()) - start;
+
+  __ nop();
+
+
+  // Return
+
+  __ ret();
+  __ delayed()->restore();
+
+  __ flush();
+
+  nmethod *nm = nmethod::new_dtrace_nmethod(
+      method, masm->code(), vep_offset, patch_offset, frame_complete,
+      stack_slots / VMRegImpl::slots_per_word);
+  return nm;
+
+}
+
+#endif // HAVE_DTRACE_H
+
+// this function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+	return (callee_locals - callee_parameters) * Interpreter::stackElementWords();
+}
+
+// "Top of Stack" slots that may be unused by the calling convention but must
+// otherwise be preserved.
+// On Intel these are not necessary and the value can be zero.
+// On Sparc this describes the words reserved for storing a register window
+// when an interrupt occurs.
+uint SharedRuntime::out_preserve_stack_slots() {
+  //return frame::register_save_words * VMRegImpl::slots_per_word;
+	 return 0;
+}
+/*
+static void gen_new_frame(MacroAssembler* masm, bool deopt) {
+//
+// Common out the new frame generation for deopt and uncommon trap
+//
+  Register        G3pcs              = G3_scratch; // Array of new pcs (input)
+  Register        Oreturn0           = O0;
+  Register        Oreturn1           = O1;
+  Register        O2UnrollBlock      = O2;
+  Register        O3array            = O3;         // Array of frame sizes (input)
+  Register        O4array_size       = O4;         // number of frames (input)
+  Register        O7frame_size       = O7;         // number of frames (input)
+
+  __ ld_ptr(O3array, 0, O7frame_size);
+  __ sub(G0, O7frame_size, O7frame_size);
+  __ save(SP, O7frame_size, SP);
+  __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
+
+  #ifdef ASSERT
+  // make sure that the frames are aligned properly
+#ifndef _LP64
+  __ btst(wordSize*2-1, SP);
+  __ breakpoint_trap(Assembler::notZero);
+#endif
+  #endif
+
+  // Deopt needs to pass some extra live values from frame to frame
+
+  if (deopt) {
+    __ mov(Oreturn0->after_save(), Oreturn0);
+    __ mov(Oreturn1->after_save(), Oreturn1);
+  }
+
+  __ mov(O4array_size->after_save(), O4array_size);
+  __ sub(O4array_size, 1, O4array_size);
+  __ mov(O3array->after_save(), O3array);
+  __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
+  __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
+
+  #ifdef ASSERT
+  // trash registers to show a clear pattern in backtraces
+  __ set(0xDEAD0000, I0);
+  __ add(I0,  2, I1);
+  __ add(I0,  4, I2);
+  __ add(I0,  6, I3);
+  __ add(I0,  8, I4);
+  // Don't touch I5 could have valuable savedSP
+  __ set(0xDEADBEEF, L0);
+  __ mov(L0, L1);
+  __ mov(L0, L2);
+  __ mov(L0, L3);
+  __ mov(L0, L4);
+  __ mov(L0, L5);
+
+  // trash the return value as there is nothing to return yet
+  __ set(0xDEAD0001, O7);
+  #endif
+
+  __ mov(SP, O5_savedSP);
+}
+
+
+static void make_new_frames(MacroAssembler* masm, bool deopt) {
+  //
+  // loop through the UnrollBlock info and create new frames
+  //
+  Register        G3pcs              = G3_scratch;
+  Register        Oreturn0           = O0;
+  Register        Oreturn1           = O1;
+  Register        O2UnrollBlock      = O2;
+  Register        O3array            = O3;
+  Register        O4array_size       = O4;
+  Label           loop;
+
+  // Before we make new frames, check to see if stack is available.
+  // Do this after the caller's return address is on top of stack
+  if (UseStackBanging) {
+    // Get total frame size for interpreted frames
+    __ ld(Address(O2UnrollBlock, 0,
+         Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4);
+    __ bang_stack_size(O4, O3, G3_scratch);
+  }
+
+  __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size);
+  __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs);
+
+  __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array);
+
+  // Adjust old interpreter frame to make space for new frame's extra java locals
+  //
+  // We capture the original sp for the transition frame only because it is needed in
+  // order to properly calculate interpreter_sp_adjustment. Even though in real life
+  // every interpreter frame captures a savedSP it is only needed at the transition
+  // (fortunately). If we had to have it correct everywhere then we would need to
+  // be told the sp_adjustment for each frame we create. If the frame size array
+  // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size]
+  // for each frame we create and keep up the illusion every where.
+  //
+
+  __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7);
+  __ mov(SP, O5_savedSP);       // remember initial sender's original sp before adjustment
+  __ sub(SP, O7, SP);
+
+#ifdef ASSERT
+  // make sure that there is at least one entry in the array
+  __ tst(O4array_size);
+  __ breakpoint_trap(Assembler::zero);
+#endif
+
+  // Now push the new interpreter frames
+  __ bind(loop);
+
+  // allocate a new frame, filling the registers
+
+  gen_new_frame(masm, deopt);        // allocate an interpreter frame
+
+  __ tst(O4array_size);
+  __ br(Assembler::notZero, false, Assembler::pn, loop);
+  __ delayed()->add(O3array, wordSize, O3array);
+  __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc
+
+}
+*/
+
+//------------------------------generate_deopt_blob----------------------------
+// Ought to generate an ideal graph & compile, but here's some SPARC ASM
+// instead.
+void SharedRuntime::generate_deopt_blob() {
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  //int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
+ // CodeBuffer*     buffer             = new CodeBuffer(2048+pad, 512, 0, 0, 0, false);
+  CodeBuffer     buffer ("deopt_blob", 2048, 2048);
+  MacroAssembler* masm  = new MacroAssembler( & buffer);
+//int frame_size, exception_offset, reexecute_offset;
+  int frame_size_in_words;
+  OopMap* map = NULL;
+ // Account for the extra args we place on the stack
+ // by the time we call fetch_unroll_info
+ const int additional_words = 2; // deopt kind, thread
+ OopMapSet *oop_maps = new OopMapSet();
+  address start = __ pc();
+  Label cont;
+  	// we use S3 for DeOpt reason register
+	Register reason = S3;
+	// use S1 for thread register
+	Register thread = TREG;
+	// use S7 for fetch_unroll_info returned UnrollBlock
+	Register unroll = S7;
+  // Prolog for non exception case!
+   // Correct the return address we were given.
+ //FIXME, return address is on the tos or Ra?
+  __ addi(RA, RA, - (NativeCall::return_address_offset));
+// Save everything in sight.
+  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
+  __ move(AT, Deoptimization::Unpack_deopt);
+  __ push(AT);
+  __ b(cont);
+  __ delayed()->nop();
+
+  int reexecute_offset = __ pc() - start;
+   // Reexecute case
+   // return address is the pc describes what bci to do re-execute at
+   // No need to update map as each call to save_live_registers will produce identical oopmap
+
+  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
+  // __ pushl(Deoptimization::Unpack_reexecute);
+  // __ jmp(cont);
+     __ move(AT, Deoptimization::Unpack_reexecute);
+     __ push(AT);
+     __ b(cont);
+     __ delayed()->nop();
+
+int   exception_offset = __ pc() - start;
+ // Prolog for exception case
+
+  // all registers are dead at this entry point, except for eax and
+  // edx which contain the exception oop and exception pc
+  // respectively.  Set them in TLS and fall thru to the
+  // unpack_with_exception_in_tls entry point.
+  //__ get_thread(edi);
+  __ get_thread(thread);
+  //__ movl(Address(edi, JavaThread::exception_pc_offset()), edx);
+  __ sw(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+  //  __ movl(Address(edi, JavaThread::exception_oop_offset()), eax);
+  __ sw(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
+  int exception_in_tls_offset = __ pc() - start;
+// new implementation because exception oop is now passed in JavaThread
+
+  // Prolog for exception case
+  // All registers must be preserved because they might be used by LinearScan
+  // Exceptiop oop and throwing PC are passed in JavaThread
+  // tos: stack at point of call to method that threw the exception (i.e. only
+  // args are on the stack, no return address)
+
+  // make room on stack for the return address
+  // It will be patched later with the throwing pc. The correct value is not
+  // available now because loading it from memory would destroy registers.
+   //__ pushl(0);
+ //It is no need here, because in save_live_register, we saved the return address(RA)
+ //FIXME here, do mips need patch the return address on stack?
+ // __ push(ZERO);
+  // Save everything in sight.
+  // No need to update map as each call to save_live_registers will produce identical oopmap
+  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
+
+  // Now it is safe to overwrite any register
+  // store the correct deoptimization type
+ // __ pushl(Deoptimization::Unpack_exception);
+   __ move(AT, Deoptimization::Unpack_exception);
+   __ push(AT);
+   // load throwing pc from JavaThread and patch it as the return address
+  // of the current frame. Then clear the field in JavaThread
+ // __ get_thread(edi);
+  __ get_thread(thread);
+//  __ movl(edx, Address(edi, JavaThread::exception_pc_offset()));
+  __ lw(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+//we should save on the stack or register?
+  // __ movl(Address(ebp, wordSize), edx);
+   __ move(RA, V1);
+ // __ movl(Address(edi, JavaThread::exception_pc_offset()), NULL_WORD);
+   __ sw(ZERO, thread, in_bytes(JavaThread::exception_pc_offset()));
+
+
+#ifdef ASSERT
+  // verify that there is really an exception oop in JavaThread
+ // __ movl(eax, Address(edi, JavaThread::exception_oop_offset()));
+   __ lw(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
+   //__ verify_oop(eax);
+   __ verify_oop(AT);
+  // verify that there is no pending exception
+  Label no_pending_exception;
+//  __ movl(eax, Address(edi, Thread::pending_exception_offset()));
+  __ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+  //__ testl(eax, eax);
+  //__ jcc(Assembler::zero, no_pending_exception);
+ __ beq(AT, ZERO, no_pending_exception);
+ __ delayed()->nop();
+ __ stop("must not have pending exception here");
+ __ bind(no_pending_exception);
+#endif
+  __ bind(cont);
+  // Compiled code leaves the floating point stack dirty, empty it.
+  __ empty_FPU_stack();
+
+
+  // Call C code.  Need thread and this frame, but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.
+
+
+
+  // All callee save registers or return value registers are saved at this poin
+  // Compiled code may leave the floating point stack dirty, empty it.
+
+  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
+  // this call, no GC can happen.  Call should capture return values.
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+   __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
+
+   __ sw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // push the one argument (relative to the oopmap)
+//	__ addiu(SP, SP, - 2*wordSize);
+   __ addiu(SP, SP, - wordSize);
+   __ move(AT, -8);
+   __ andr(SP, SP, AT);
+   __ move(A0, thread);
+
+   __ relocate(relocInfo::internal_pc_type);
+   {
+	int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
+	__ lui(AT, Assembler::split_high(save_pc));
+	__ addiu(AT, AT, Assembler::split_low(save_pc));
+   }
+   __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+   __ lui(T9, Assembler::split_high((int)Deoptimization::fetch_unroll_info));
+   __ addiu(T9, T9, Assembler::split_low((int)Deoptimization::fetch_unroll_info));
+   __ jalr(T9);
+   __ delayed()->nop();
+   oop_maps->add_gc_map(__ pc() - start, map);
+// pop the arg so now frame offsets (slots) don't include any arg.
+   __ lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+   __ reset_last_Java_frame(thread, false, true);
+
+  // Load UnrollBlock into S7
+   __ move(unroll, V0);
+
+  // Store frame locals in registers or memory
+
+  // Move the unpack kind to a safe place in the UnrollBlock because
+  // we are very short of registers
+
+  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
+  __ pop(reason);
+  __ sw(reason, unpack_kind);
+  // save the unpack_kind value
+  // Retrieve the possible live values (return values)
+  // All callee save registers representing jvm state
+  // are now in the vframeArray.
+
+  Label noException;
+  __ move(AT, Deoptimization::Unpack_exception);
+  __ bne(AT, reason, noException);// Was exception pending?
+  __ delayed()->nop();
+  __ lw(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
+  __ lw(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ sw(ZERO, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ sw(ZERO, thread, in_bytes(JavaThread::exception_oop_offset()));
+
+  __ verify_oop(V0);
+
+  // Overwrite the result registers with the exception results.
+  //__ movl(Address(esp, RegisterSaver::eaxOffset()*wordSize), eax);
+  __ sw(V0, SP, RegisterSaver::v0Offset()*wordSize);
+//	__ movl(Address(esp, RegisterSaver::edxOffset()*wordSize), edx);
+  __ sw(V1, SP, RegisterSaver::v1Offset()*wordSize);
+
+  __ bind(noException);
+
+//	__ lw(V0, SP, V0_off * wordSize);
+//	__ lw(V1, SP, V1_off * wordSize);
+	//__ lwc1(F0, SP, F0_off * wordSize);
+	//__ lwc1(F1, SP, (F0_off + 1) * wordSize);
+
+  // Stack is back to only having register save data on the stack.
+  // Now restore the result registers. Everything else is either dead or captured
+  // in the vframeArray.
+
+  RegisterSaver::restore_result_registers(masm);
+   // All of the register save area has been popped of the stack. Only the
+  // return address remains.
+  // Pop all the frames we must move/replace.
+ // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: caller of deopting frame (could be compiled/interpreted).
+  //
+  // Note: by leaving the return address of self-frame on the stack
+  // and using the size of frame 2 to adjust the stack
+  // when we are done the return to frame 3 will still be on the stack.
+
+  // register for the sender's sp
+  Register sender_sp = T5;
+  // register for frame pcs
+  Register pcs = T4;
+  // register for frame sizes
+  Register sizes = T6;
+  // register for frame count
+  Register count = T3;
+
+  // Pop deoptimized frame
+  //__ addl(esp,Address(edi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
+  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
+  __ add(SP, SP, AT);
+  // sp should be pointing at the return address to the caller (3)
+  // Load array of frame pcs into ECX
+  //__ movl(ecx,Address(edi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+  // Load array of frame pcs into T5
+  __ lw(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
+  //__ popl(esi); // trash the old pc
+  //FIXME , do mips need trach the old pc
+  __ addi(SP, SP, wordSize);
+  // Load array of frame sizes into ESI
+//  __ movl(esi,Address(edi,Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
+ // Load array of frame sizes into T6
+  __ lw(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
+
+
+
+ Address counter(unroll, Deoptimization::UnrollBlock::counter_temp_offset_in_bytes());
+  //__ movl(ebx, Address(edi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
+  //__ movl(counter, ebx);
+   // Load count of frams into T3
+  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
+  __ sw(count, counter);
+  // Pick up the initial fp we should save
+ // __ movl(ebp, Address(edi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
+  __ lw(FP, unroll,  Deoptimization::UnrollBlock::initial_fp_offset_in_bytes());
+   // Now adjust the caller's stack to make up for the extra locals
+  // but record the original sp so that we can save it in the skeletal interpreter
+  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // value and not the "real" sp value.
+  __ move(sender_sp, SP);
+// Address sp_temp(unroll, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
+//__ movl(sp_temp, esp);
+//__ sw(SP, sp_temp);
+  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
+  __ sub(SP, SP, AT);
+
+  // Push interpreter frames in a loop
+  Label loop;
+   __ bind(loop);
+   __ lw(T2, sizes, 0);		// Load frame size
+   __ lw(AT, pcs, 0);  	       // save return address
+   __ sw(AT, SP, (-1) * wordSize);
+   __ sw(FP, SP, (-2) * wordSize);
+   __ addi(FP, SP, (-2) * wordSize);	// save old & set new FP
+   __ sub(SP, SP, T2); 			// Prolog!
+  // This value is corrected by layout_activation_impl
+  //__ movl(Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD );
+   __ sw(ZERO, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+   __ sw(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
+   __ move(sender_sp, SP);	// pass to next frame
+   __ addi(count, count, -1); 	// decrement counter
+   __ addi(sizes, sizes, 4); 	// Bump array pointer (sizes)
+   __ bne(count, ZERO, loop);
+   __ delayed()->addi(pcs, pcs, 4); 	// Bump array pointer (pcs)
+   __ lw(AT, pcs, 0);
+   __ sw(AT, SP, (-1) * wordSize); 	// save final return address
+  // Re-push self-frame
+   __ sw(FP, SP, (-2) * wordSize);
+   __ addi(FP, SP, - 2 * wordSize);	// save old & set new ebp
+ //  __ addi(SP, SP, -(reg_save_frame_size) * wordSize);
+   __ addi(SP, SP, -(frame_size_in_words - additional_words) * wordSize);
+
+   // Restore frame locals after moving the frame
+//	__ sw(V0, SP, V0_off * wordSize);
+  __ sw(V0, SP, RegisterSaver::v0Offset() * wordSize);
+//	__ sw(V1, SP, V1_off * wordSize);
+  __ sw(V1, SP, RegisterSaver::v1Offset() * wordSize);
+  __ swc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
+  __ swc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
+
+
+  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
+  // this call, no GC can happen.
+  __ set_last_Java_frame(thread, NOREG, FP, NULL);
+  __ sw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  __ move(A1, reason);	// exec_mode
+  __ move(A0, thread);	// thread
+  __ addi(SP, SP, (-2) *wordSize);
+  __ move(AT, -8);
+  __ andr(SP, SP, AT);
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
+    __ lui(AT, Assembler::split_high(save_pc));
+    __ addiu(AT, AT, Assembler::split_low(save_pc));
+  }
+  __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+	//__ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
+  __ lui(T9, Assembler::split_high((int)Deoptimization::unpack_frames));
+  __ addiu(T9, T9, Assembler::split_low((int)Deoptimization::unpack_frames));
+  __ jalr(T9);
+  __ delayed()->nop();
+  // Set an oopmap for the call site
+ // oop_maps->add_gc_map(__ offset(), true, new OopMap(reg_save_frame_size + 2, 0));
+  oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
+
+//	__ addi(SP, SP, 2 * wordSize);
+ //FIXME here, do we need it?
+//  __ push(V0);
+
+  __ lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+ // __ reset_last_Java_frame(thread, false, true);
+  __ reset_last_Java_frame(thread, false, false);
+
+  // Collect return values
+//	__ lw(V0, SP, V0_off * wordSize);
+  __ lw(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize);
+//	__ lw(V1, SP, V1_off * wordSize);
+  __ lw(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize);
+//FIXME,
+  // Clear floating point stack before returning to interpreter
+   __ empty_FPU_stack();
+//FIXME, we should consider about float and double
+  // Push a float or double return value if necessary.
+/*
+   Label no_double_value, yes_double_value;
+  __ lw(T1, SP, ret_type*wordSize);
+  __ move(AT, T_DOUBLE);
+  __ beq(AT, T1, yes_double_value);
+  __ delayed();
+
+  __ move(AT, T_FLOAT);
+  __ bne(AT, T1, no_double_value);
+  __ delayed()->nop();
+
+  __ bind(yes_double_value);
+  __ lwc1(F0, SP, F0_off * wordSize);
+  __ lwc1(F1, SP, (F0_off + 1) * wordSize);
+  __ bind(no_double_value);
+#endif
+*/
+  __ leave();
+
+  // Jump to interpreter
+  __ jr(RA);
+  __ delayed()->nop();
+
+ // frame_size            = reg_save_frame_size + 2;
+  masm->flush();
+  //_deopt_blob = DeoptimizationBlob::create(buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size);
+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
+  //deopt_with_exception_entry_for_patch = _deopt_blob->unpack() + patching_exception_offset;
+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+ //jerome_for_debug
+//  _deopt_blob->print();
+}
+
+#ifdef COMPILER2
+
+//------------------------------generate_uncommon_trap_blob--------------------
+// Ought to generate an ideal graph & compile, but here's some SPARC ASM
+// instead.
+void SharedRuntime::generate_uncommon_trap_blob() {
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  CodeBuffer*   buffer = new CodeBuffer(512, 512, 0, 0, 0, false);
+  MacroAssembler* masm = new MacroAssembler(buffer);
+
+  enum frame_layout {
+    arg0_off,      // thread                     sp + 0 // Arg location for
+    arg1_off,      // unloaded_class_index       sp + 1 // calling C
+		s0_off,
+		s1_off,
+		s2_off,
+		s3_off,
+		s4_off,
+		s5_off,
+		s6_off,
+		s7_off,
+    return_off,    // slot for return address    sp + 9
+    framesize
+  };
+
+  address start = __ pc();
+  // Push self-frame.
+  ///__ subl(esp,return_off*wordSize);     // Epilog!
+  __ lw(AT, ZERO, 24);
+  __ nop();
+  __ stop("enter in handle uncommon blob\n");
+
+  __ addiu(SP, SP, -return_off*wordSize);
+
+  // Save callee saved registers.  None for UseSSE=0,
+  // floats-only for UseSSE=1, and doubles for UseSSE=2.
+  __ sw(S0, SP, s0_off);
+  __ sw(S1, SP, s1_off);
+  __ sw(S2, SP, s2_off);
+  __ sw(S3, SP, s3_off);
+  __ sw(S4, SP, s4_off);
+  __ sw(S5, SP, s5_off);
+  __ sw(S6, SP, s6_off);
+  __ sw(S7, SP, s7_off);
+
+  // Clear the floating point exception stack
+  __ empty_FPU_stack();
+
+  Register thread = TREG;
+
+  // set last_Java_sp
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  ///__ set_last_Java_frame(edx, noreg, noreg, NULL);
+  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    int save_pc = (int)__ pc() +  20 + NativeCall::return_address_offset;
+    __ lui(AT, Assembler::split_high(save_pc));
+    __ addiu(AT, AT, Assembler::split_low(save_pc));
+  }
+  __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // capture callee-saved registers as well as return values.
+  __ move(A0, thread);
+  // argument already in T1
+  __ move(A1, T1);
+  __ lui(T9, Assembler::split_high((int)Deoptimization::uncommon_trap));
+  __ addiu(T9, T9, Assembler::split_low((int)Deoptimization::uncommon_trap));
+  __ jalr(T9);
+  __ delayed()->nop();
+
+  // Set an oopmap for the call site
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map =  new OopMap( framesize, 0 );
+
+  map->set_callee_saved( SharedInfo::stack2reg(s0_off   ), framesize,0, OptoReg::Name(S0_num  ) );
+  map->set_callee_saved( SharedInfo::stack2reg(s1_off   ), framesize,0, OptoReg::Name(S1_num  ) );
+  map->set_callee_saved( SharedInfo::stack2reg(s2_off   ), framesize,0, OptoReg::Name(S2_num  ) );
+  map->set_callee_saved( SharedInfo::stack2reg(s3_off   ), framesize,0, OptoReg::Name(S3_num  ) );
+  map->set_callee_saved( SharedInfo::stack2reg(s4_off   ), framesize,0, OptoReg::Name(S4_num  ) );
+  map->set_callee_saved( SharedInfo::stack2reg(s5_off   ), framesize,0, OptoReg::Name(S5_num  ) );
+  map->set_callee_saved( SharedInfo::stack2reg(s6_off   ), framesize,0, OptoReg::Name(S6_num  ) );
+  map->set_callee_saved( SharedInfo::stack2reg(s7_off   ), framesize,0, OptoReg::Name(S7_num  ) );
+  oop_maps->add_gc_map( __ offset(), true, map);
+
+  __ get_thread(thread);
+  __ reset_last_Java_frame(thread, false,true);
+
+  // Load UnrollBlock into S7
+  Register unroll = S7;
+  __ move(unroll, V0);
+
+  // Pop all the frames we must move/replace.
+  //
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: possible-i2c-adapter-frame
+  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
+  //    and c2i here)
+
+  // Pop self-frame.  We have no frame, and must rely only on EAX and ESP.
+  __ addiu(SP, SP, (framesize-1)*wrodSize);
+
+  // Pop deoptimized frame
+  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
+  __ add(SP, SP, AT);
+
+  // register for frame pcs
+  Register pcs = T4;
+  // register for frame sizes
+  Register sizes = T6;
+  // register for frame count
+  Register count = T3;
+  // register for the sender's sp
+  Register sender_sp = T1;
+
+  // sp should be pointing at the return address to the caller (4)
+  // Load array of frame pcs into ECX
+  __ lw(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
+  __ addiu(SP, SP, 4);
+
+  // Load array of frame sizes into ESI
+  __ lw(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
+  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
+
+  // Pick up the initial fp we should save
+  __ lw(FP, unroll, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes());
+  // Now adjust the caller's stack to make up for the extra locals
+  // but record the original sp so that we can save it in the skeletal interpreter
+  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // value and not the "real" sp value.
+
+  __ move(sender_sp, SP);
+  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
+  __ sub(SP, SP, AT);
+  // Push interpreter frames in a loop
+  Label loop;
+  __ bind(loop);
+  __ lw(AT, pcs, 0);
+  __ sw(AT, SP, -4);
+  __ sw(FP, SP, -8);
+  __ addiu(FP, SP, -8);
+  __ lw(T2, sizes, 0);
+  __ sub(SP, SP, T2);
+  __ sw(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
+  __ move(sender_sp, SP);
+  __ addiu(count, count, -1);
+  __ addiu(sizes, sizes, 4);
+  __ bne(count, ZERO, loop);
+  __ delayed()->addiu(pcs, pcs, 4);
+
+  __ lw(AT, pcs, 0);
+  __ sw(AT, SP, -4);
+  __ sw(FP, SP, -8);
+  __ addiu(FP, SP, -8);
+  __ addiu(SP, SP, -(framesize-2)* wordSize);
+
+  // set last_Java_sp, last_Java_fp
+  __ get_thread(thread);
+  __ set_last_Java_frame(thread, NOREG, FP, NULL);
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    int save_pc = (int)__ pc() +  24 + NativeCall::return_address_offset;
+    __ lui(AT, Assembler::split_high(save_pc));
+    __ addiu(AT, AT, Assembler::split_low(save_pc));
+  }
+  __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // restore return values to their stack-slots with the new SP.
+  __ move(A0, thread);
+  __ move(A1, Deoptimization::Unpack_uncommon_trap);
+  __ addiu(SP, SP, -8);
+  __ lui(T9, Assembler::split_high((int)Deoptimization::unpack_frames));
+  __ addiu(T9, T9, Assembler::split_high((int)Deoptimization::unpack_frames));
+  __ jalr(T9);
+  __ delayed()->nop();
+  // Set an oopmap for the call site
+  oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) );
+
+  __ get_thread(thread);
+  __ reset_last_Java_frame(thread, true,false);
+
+  // Pop self-frame.
+  __ leave();     // Epilog!
+
+  // Jump to interpreter
+  __ jr(RA);
+  __ delayed()->nop();
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+
+  _uncommon_trap_blob = UncommonTrapBlob::create(buffer, oop_maps, framesize);
+}
+
+#endif // COMPILER2
+
+//------------------------------generate_handler_blob-------------------
+//
+// Generate a special Compile2Runtime blob that saves all registers, and sets
+// up an OopMap.
+//
+// This blob is jumped to (via a breakpoint and the signal handler) from a
+// safepoint in compiled code.  On entry to this blob, O7 contains the
+// address in the original nmethod at which we should resume normal execution.
+// Thus, this blob looks like a subroutine which must preserve lots of
+// registers and return normally.  Note that O7 is never register-allocated,
+// so it is guaranteed to be free here.
+//
+
+// The hardest part of what this blob must do is to save the 64-bit %o
+// registers in the 32-bit build.  A simple 'save' turn the %o's to %i's and
+// an interrupt will chop off their heads.  Making space in the caller's frame
+// first will let us save the 64-bit %o's before save'ing, but we cannot hand
+// the adjusted FP off to the GC stack-crawler: this will modify the caller's
+// SP and mess up HIS OopMaps.  So we first adjust the caller's SP, then save
+// the 64-bit %o's, then do a save, then fixup the caller's SP (our FP).
+// Tricky, tricky, tricky...
+
+static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {
+
+  // Account for thread arg in our frame
+  const int additional_words = 0;
+  int frame_size_in_words;
+
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+  ResourceMark rm;
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map;
+
+  // allocate space for the code
+  // setup code generation tools
+  CodeBuffer  buffer ("handler_blob", 1024, 512);
+  MacroAssembler* masm = new MacroAssembler( &buffer);
+
+  ///const Register java_thread = edi; // callee-saved for VC++
+  const Register thread = TREG; // callee-saved for VC++
+  address start   = __ pc();
+  address call_pc = NULL;
+
+  // If cause_return is true we are at a poll_return and there is
+  // the return address on the stack to the caller on the nmethod
+  // that is safepoint. We can leave this return on the stack and
+  // effectively complete the return and safepoint in the caller.
+  // Otherwise we push space for a return address that the safepoint
+  // handler will install later to make the stack walking sensible.
+	// i dont think we need this in godson.
+  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
+
+  // The following is basically a call_VM. However, we need the precise
+  // address of the call in order to generate an oopmap. Hence, we do all the
+  // work outselvs.
+
+  // Push thread argument and setup last_Java_sp
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ move(A0, thread);
+  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
+    __ lui(AT, Assembler::split_high(save_pc));
+    __ addiu(AT, AT, Assembler::split_low(save_pc));
+  }
+  __ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+
+  // do the call
+  __ lui(T9, Assembler::split_high((int)call_ptr));
+  __ addiu(T9, T9, Assembler::split_low((int)call_ptr));
+  __ jalr(T9);
+  __ delayed()->nop();
+
+  // Set an oopmap for the call site.  This oopmap will map all
+  // oop-registers and debug-info registers as callee-saved.  This
+  // will allow deoptimization at this safepoint to find all possible
+  // debug-info recordings, as well as let GC find all oops.
+
+  oop_maps->add_gc_map(__ offset(),  map);
+
+
+  Label noException;
+
+  // Clear last_Java_sp again
+  __ reset_last_Java_frame(thread, false, false);
+
+  __ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+  __ beq(AT, ZERO, noException);
+  __ delayed()->nop();
+
+  // Exception pending
+
+  RegisterSaver::restore_live_registers(masm);
+  //forward_exception_entry need return address on the stack
+  __ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry()));
+  __ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry()));
+  __ jr(T9);
+  __ delayed()->nop();
+
+  // No exception case
+  Label continueL;
+
+  __ bind(noException);
+  __ slt(AT, V0, ZERO);
+  __ bne(AT, ZERO, continueL);
+  __ delayed()->nop();
+
+  // Normal exit, register restoring and exit
+  RegisterSaver::restore_live_registers(masm);
+
+  __ jr(RA);
+  __ delayed()->nop();
+
+  // we have deoptimized at a blocked call, we may not reexecute the
+  // instruction as we would skip the call in interpreter; therefore
+  // execute the destination of the call; the destination is valid
+  // because the receiver was already consumed
+  // ecx holds the destination of the call
+  __ bind(continueL);
+  __ move(RA, V0);
+  RegisterSaver::restore_live_registers(masm);
+
+  // Everything is just like we were at entry (except ebx)
+  // original return address is still there too (we deopt on return)
+  // just continue with the call.
+  __ jr(RA);
+  __ delayed()->nop();
+
+
+  // make sure all code is generated
+  masm->flush();
+
+  // Fill-out other meta info
+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
+}
+
+//
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
+//
+// Generate a stub that calls into vm to find out the proper destination
+// of a java call. All the argument registers are live at this point
+// but since this is generic code we don't know what they are and the caller
+// must do any gc of the args.
+//
+static RuntimeStub* generate_resolve_blob(address destination, const char* name) {
+	assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+	// allocate space for the code
+	ResourceMark rm;
+
+	//CodeBuffer buffer(name, 1000, 512);
+	CodeBuffer buffer(name, 2000, 1024);
+	MacroAssembler* masm  = new MacroAssembler(&buffer);
+
+	int frame_size_words;
+	//we put the thread in A0
+	enum frame_layout {
+		extra_words = 0 };
+
+	OopMapSet *oop_maps = new OopMapSet();
+	OopMap* map = NULL;
+
+	int start = __ offset();
+	map = RegisterSaver::save_live_registers(masm, extra_words, &frame_size_words);
+
+
+	int frame_complete = __ offset();
+
+	const Register thread = T8;
+	__ get_thread(thread);
+
+	__ move(A0, thread);
+	__ set_last_Java_frame(thread, noreg, FP, NULL);
+	__ addi(SP,SP, -wordSize);
+	//align the stack before invoke native
+	__ move(AT, -8);
+	__ andr(SP, SP, AT);
+	__ relocate(relocInfo::internal_pc_type);
+	{
+		int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
+		__ lui(AT, Assembler::split_high(save_pc));
+		__ addiu(AT, AT, Assembler::split_low(save_pc));
+	}
+	__ sw(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+	__ lui(T9, Assembler::split_high((int)destination));
+	__ addiu(T9, T9, Assembler::split_low((int)destination));
+   	__ jalr(T9);
+   	__ delayed()->nop();
+
+	// Set an oopmap for the call site.
+	// We need this not only for callee-saved registers, but also for volatile
+	// registers that the compiler might be keeping live across a safepoint.
+	//printf("resolve blob start = 0x%x, offset=0x%x \n", (int)start, (int)(__ offset()));
+	oop_maps->add_gc_map( __ offset() - start, map);
+	// eax contains the address we are going to jump to assuming no exception got installed
+	__ get_thread(thread);
+	__ lw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+	// clear last_Java_sp
+	__ reset_last_Java_frame(thread, true, true);
+	// check for pending exceptions
+	Label pending;
+	__ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+	__ bne(AT,ZERO, pending);
+	__ delayed()->nop();
+	// get the returned methodOop
+	//FIXME, do mips need this ?
+	__ lw(T7, thread, in_bytes(JavaThread::vm_result_offset()));
+	__ sw(T7, SP, RegisterSaver::t7_Offset() * wordSize);
+	__ sw(V0, SP, RegisterSaver::v0_Offset() * wordSize);
+	RegisterSaver::restore_live_registers(masm);
+
+	// We are back the the original state on entry and ready to go.
+	//  __ jmp(eax);
+	__ jr(V0);
+	__ delayed()->nop();
+	// Pending exception after the safepoint
+
+	__ bind(pending);
+
+	RegisterSaver::restore_live_registers(masm);
+
+	// exception pending => remove activation and forward to exception handler
+	//forward_exception_entry need return address on the stack
+	__ push(RA);
+	__ get_thread(thread);
+	//  __ movl(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
+	__ sw(ZERO, thread, in_bytes(JavaThread::vm_result_offset()));
+	// __ movl(eax, Address(thread, Thread::pending_exception_offset()));
+	__ lw(V0, thread, in_bytes(Thread::pending_exception_offset()));
+	__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+	__ delayed() -> nop();
+	// -------------
+	// make sure all code is generated
+	masm->flush();
+
+	RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
+	return tmp;
+}
+
+void SharedRuntime::generate_stubs() {
+	_wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
+				SharedRuntime::handle_wrong_method),"wrong_method_stub");
+	_ic_miss_blob      = generate_resolve_blob(CAST_FROM_FN_PTR(address,
+				SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub");
+	_resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
+				SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call");
+	_resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
+				SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call");
+	_resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address,
+				SharedRuntime::resolve_static_call_C),"resolve_static_call");
+	_polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address,
+				SafepointSynchronize::handle_polling_page_exception), false);
+	_polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address,
+				SafepointSynchronize::handle_polling_page_exception), true);
+	generate_deopt_blob();
+#ifdef COMPILER2
+	_uncommon_trap_blob = generate_uncommon_trap_blob();
+#endif // COMPILER2
+}
+
+extern "C" int SpinPause() {return 0;}
+extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ;
+extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/stubGenerator_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,3479 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_stubGenerator_mips.cpp.incl"
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp
+
+#define __ _masm->
+//#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
+//#define a__ ((Assembler*)_masm)->
+
+//#ifdef PRODUCT
+//#define BLOCK_COMMENT(str) /* nothing */
+//#else
+//#define BLOCK_COMMENT(str) __ block_comment(str)
+//#endif
+
+//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
+
+// Stub Code definitions
+
+static address handle_unsafe_access() {
+  JavaThread* thread = JavaThread::current();
+  address pc = thread->saved_exception_pc();
+  // pc is the instruction which we must emulate
+  // doing a no-op is fine:  return garbage from the load
+  // therefore, compute npc
+  //address npc = Assembler::locate_next_instruction(pc);
+	address npc = (address)((unsigned long)pc + sizeof(unsigned long));
+
+  // request an async exception
+  thread->set_pending_unsafe_access_error();
+
+  // return address of next instruction to execute
+  return npc;
+}
+
+class StubGenerator: public StubCodeGenerator {
+ private:
+#if 0
+  void inc_counter_np_(int& counter) {
+    __ incrementl(ExternalAddress((address)&counter));
+  }
+
+  // Call stubs are used to call Java from C
+  //
+  // Linux Arguments:
+  //    c_rarg0:   call wrapper address                   address
+  //    c_rarg1:   result                                 address
+  //    c_rarg2:   result type                            BasicType
+  //    c_rarg3:   method                                 methodOop
+  //    c_rarg4:   (interpreter) entry point              address
+  //    c_rarg5:   parameters                             intptr_t*
+  //    16(rbp): parameter size (in words)              int
+  //    24(rbp): thread                                 Thread*
+  //
+  //     [ return_from_Java     ] <--- rsp
+  //     [ argument word n      ]
+  //      ...
+  // -12 [ argument word 1      ]
+  // -11 [ saved r15            ] <--- rsp_after_call
+  // -10 [ saved r14            ]
+  //  -9 [ saved r13            ]
+  //  -8 [ saved r12            ]
+  //  -7 [ saved rbx            ]
+  //  -6 [ call wrapper         ]
+  //  -5 [ result               ]
+  //  -4 [ result type          ]
+  //  -3 [ method               ]
+  //  -2 [ entry point          ]
+  //  -1 [ parameters           ]
+  //   0 [ saved rbp            ] <--- rbp
+  //   1 [ return address       ]
+  //   2 [ parameter size       ]
+  //   3 [ thread               ]
+  //
+  // Windows Arguments:
+  //    c_rarg0:   call wrapper address                   address
+  //    c_rarg1:   result                                 address
+  //    c_rarg2:   result type                            BasicType
+  //    c_rarg3:   method                                 methodOop
+  //    48(rbp): (interpreter) entry point              address
+  //    56(rbp): parameters                             intptr_t*
+  //    64(rbp): parameter size (in words)              int
+  //    72(rbp): thread                                 Thread*
+  //
+  //     [ return_from_Java     ] <--- rsp
+  //     [ argument word n      ]
+  //      ...
+  //  -8 [ argument word 1      ]
+  //  -7 [ saved r15            ] <--- rsp_after_call
+  //  -6 [ saved r14            ]
+  //  -5 [ saved r13            ]
+  //  -4 [ saved r12            ]
+  //  -3 [ saved rdi            ]
+  //  -2 [ saved rsi            ]
+  //  -1 [ saved rbx            ]
+  //   0 [ saved rbp            ] <--- rbp
+  //   1 [ return address       ]
+  //   2 [ call wrapper         ]
+  //   3 [ result               ]
+  //   4 [ result type          ]
+  //   5 [ method               ]
+  //   6 [ entry point          ]
+  //   7 [ parameters           ]
+  //   8 [ parameter size       ]
+  //   9 [ thread               ]
+  //
+  //    Windows reserves the callers stack space for arguments 1-4.
+  //    We spill c_rarg0-c_rarg3 to this space.
+
+  // Call stub stack layout word offsets from rbp
+  enum call_stub_layout {
+#ifdef _WIN64
+    rsp_after_call_off = -7,
+    r15_off            = rsp_after_call_off,
+    r14_off            = -6,
+    r13_off            = -5,
+    r12_off            = -4,
+    rdi_off            = -3,
+    rsi_off            = -2,
+    rbx_off            = -1,
+    rbp_off            =  0,
+    retaddr_off        =  1,
+    call_wrapper_off   =  2,
+    result_off         =  3,
+    result_type_off    =  4,
+    method_off         =  5,
+    entry_point_off    =  6,
+    parameters_off     =  7,
+    parameter_size_off =  8,
+    thread_off         =  9
+#else
+    rsp_after_call_off = -12,
+    mxcsr_off          = rsp_after_call_off,
+    r15_off            = -11,
+    r14_off            = -10,
+    r13_off            = -9,
+    r12_off            = -8,
+    rbx_off            = -7,
+    call_wrapper_off   = -6,
+    result_off         = -5,
+    result_type_off    = -4,
+    method_off         = -3,
+    entry_point_off    = -2,
+    parameters_off     = -1,
+    rbp_off            =  0,
+    retaddr_off        =  1,
+    parameter_size_off =  2,
+    thread_off         =  3
+#endif
+  };
+#endif
+
+  // ABI mips o32
+	// This fig is not MIPS ABI. It is call Java from C ABI.
+  // Call stubs are used to call Java from C
+  //
+  //    [ return_from_Java     ]
+  //    [ argument word n-1    ] <--- sp
+  //      ...
+  //    [ argument word 0      ]
+  //      ...
+  //-10 [ S6     	       ]
+  // -9 [ S5		       ]
+  // -8 [ S4		       ]
+  // -7 [ S3                   ]
+  // -6 [ S0  		       ]
+  // -5 [ TSR(S2)	       ]
+  // -4 [ LVP(S7)              ]
+  // -3 [ BCP(S1)              ]
+  // -2 [ saved fp             ] <--- fp_after_call
+  // -1 [ return address       ]
+  //  0 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
+  //  1 [ result               ] <--- a1
+  //  2 [ result_type          ] <--- a2
+  //  3 [ method               ] <--- a3
+  //  4 [ entry_point          ]
+  //  5 [ parameters           ]
+  //  6 [ parameter_size       ]
+  //  7 [ thread               ]
+
+  address generate_call_stub(address& return_address) {
+    //assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 &&
+    //       (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
+    //       "adjust this code");
+    StubCodeMark mark(this, "StubRoutines", "call_stub");
+    address start = __ pc();
+
+    // same as in generate_catch_exception()!
+
+	// stub code
+	/* do not call enter because we have not a consistent view about whether or not we should
+	   save ra and fp register.
+	   */
+	// save ra and fp
+	__ sw(RA, SP, (-1) * wordSize);
+	__ sw(FP, SP, (-2) * wordSize);
+
+	__ sw(BCP, SP, (-3) * wordSize);
+	__ sw(LVP, SP, (-4) * wordSize);
+	__ sw(TSR, SP, (-5) * wordSize);
+
+	__ sw(S1, SP, (-6) * wordSize);
+	__ sw(S3, SP, (-7) * wordSize);
+	__ sw(S4, SP, (-8) * wordSize);
+	__ sw(S5, SP, (-9) * wordSize);
+	__ sw(S6, SP, (-10) * wordSize);
+
+#ifdef OPT_THREAD
+	__ get_thread(TREG);
+#endif
+
+
+	// lw parameter_size
+	__ lw(T0, SP, 6 * wordSize);
+	// I think 14 is the max gap between argument and callee saved register
+	//__ move(FP, SP);
+	__ addi(FP, SP, (-2) * wordSize);
+	__ addi(SP, SP, (-10) * wordSize);
+
+	// save parameter
+//	__ sw(A0, FP, 0 * wordSize);
+//	__ sw(A1, FP, 1 * wordSize);
+//	__ sw(A2, FP, 2 * wordSize);
+//	__ sw(A3, FP, 3 * wordSize);
+	__ sw(A0, FP, 2 * wordSize);
+	__ sw(A1, FP, 3 * wordSize);
+	__ sw(A2, FP, 4 * wordSize);
+	__ sw(A3, FP, 5 * wordSize);
+
+
+#ifdef ASSERT
+	// make sure we have no pending exceptions
+	{ Label L;
+		// load thread
+//		__ lw(T2, FP, 7 * wordSize);
+		__ lw(T2, FP, 9 * wordSize);
+		__ lw(T3, T2, in_bytes(Thread::pending_exception_offset()));
+		__ beq(T3, ZERO, L);
+		__ delayed()->nop();
+		/* FIXME: I do not know how to realize stop in mips arch, do it in the future */
+		__ stop("StubRoutines::call_stub: entered with pending exception");
+		__ bind(L);
+	}
+#endif
+
+	// pass parameters if any
+	Label parameters_done;
+	// judge if the parameter_size equals 0
+	__ beq(T0, ZERO, parameters_done);
+	__ delayed()->nop();
+	__ sll(AT,T0,Interpreter::logStackElementSize());
+	__ sub(SP, SP, AT);
+	__ move(AT, -StackAlignmentInBytes);
+	__ andr(SP, SP , AT);
+  // Copy Java parameters in reverse order (receiver last)
+	// Note that the argument order is inverted in the process
+	// source is edx[ecx: N-1..0]
+	// dest   is esp[ebx: 0..N-1]
+	Label loop;
+//	__ lw(T2, FP, 5 * wordSize);   // parameter pointer in T2,refernce to the stack arch
+	__ lw(T2, FP, 7 * wordSize);   // parameter pointer in T2,refernce to the stack arch
+	__ move(T4, ZERO);
+	__ bind(loop);
+	if (TaggedStackInterpreter) {
+	__ sll(T5, T0, 3);
+	__ add(T5, T5, T2);
+	__ lw(AT, T5,  -2*wordSize);
+	__ sll(T5,T4,3);
+	__ add(T5,T5, SP);
+	__ sw(AT, T5, Interpreter::expr_tag_offset_in_bytes(0));
+	}
+
+	// get parameter
+	__ sll(T5, T0, 2);
+	__ add(T5, T5, T2);
+	__ lw(AT, T5,  -wordSize);
+	__ sll(T5,T4,2);
+	__ add(T5,T5, SP);
+	__ sw(AT, T5, Interpreter::expr_offset_in_bytes(0));
+	__ addi(T4,T4,1);
+	__ addi(T0,T0,-1);
+	__ bne(T0, ZERO, loop);
+	__ delayed()->nop();
+	// advance to next parameter
+
+	// call Java function
+	__ bind(parameters_done);
+
+	// receiver in V0, methodOop in T7
+
+	//    __ lw(T7, FP, 3 * wordSize);        // get methodOop
+	__ move(T7, A3);
+//	__ lw(T9, FP, 4 * wordSize);       	// get entry_point
+	__ lw(T9, FP, 6 * wordSize);       	// get entry_point
+	__ move(T5,SP);             //set sender sp
+	__ jalr(T9);
+	__ delayed()->nop();
+	return_address = __ pc();
+
+	Label common_return;
+	__ bind(common_return);
+
+	// store result depending on type
+	// (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+//	__ lw(T0, FP, 1 * wordSize); 	// result --> T0
+	__ lw(T0, FP, 3 * wordSize); 	// result --> T0
+	Label is_long, is_float, is_double, exit;
+//	__ lw(T2, FP, 2 * wordSize);	// result_type --> T2
+	__ lw(T2, FP, 4 * wordSize);	// result_type --> T2
+	__ addi(T3, T2, (-1) * T_LONG);
+	__ beq(T3, ZERO, is_long);
+	__ delayed()->addi(T3, T2, (-1) * T_FLOAT);
+	__ beq(T3, ZERO, is_float);
+	__ delayed()->addi(T3, T2, (-1) * T_DOUBLE);
+	__ beq(T3, ZERO, is_double);
+	__ delayed()->nop();
+
+	// handle T_INT case
+	__ sw(V0, T0, 0 * wordSize);
+	__ bind(exit);
+
+	// restore
+	__ addi(SP, FP, 2 * wordSize );
+	__ lw(RA, SP, -1 * wordSize);
+	__ lw(FP, SP, -2 * wordSize);
+	__ lw(BCP, SP, -3 * wordSize);
+	__ lw(LVP, SP, -4 * wordSize);
+	__ lw(TSR, SP, -5 * wordSize);
+
+	__ lw(S1, SP, (-6) * wordSize);
+	__ lw(S3, SP, (-7) * wordSize);
+	__ lw(S4, SP, (-8) * wordSize);
+	__ lw(S5, SP, (-9) * wordSize);
+	__ lw(S6, SP, (-10) * wordSize);
+	// return
+	__ jr(RA);
+	__ delayed()->nop();
+
+	// handle return types different from T_INT
+	__ bind(is_long);
+	__ sw(V0, T0, 0 * wordSize);
+	__ sw(V1, T0, 1 * wordSize);
+	__ b(exit);
+	__ delayed()->nop();
+
+	__ bind(is_float);
+	__ swc1(F0, T0, 0 * wordSize);
+	__ b(exit);
+	__ delayed()->nop();
+
+	__ bind(is_double);
+	__ swc1(F0, T0, 0 * wordSize);
+	__ swc1(F1, T0, 1 * wordSize);
+	__ b(exit);
+	__ delayed()->nop();
+	//FIXME, 1.6 x86 version add operation of fpu here
+	StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
+	__ b(common_return);
+	__ delayed()->nop();
+	return start;
+  }
+
+  // Return point for a Java call if there's an exception thrown in
+  // Java code.  The exception is caught and transformed into a
+  // pending exception stored in JavaThread that can be tested from
+  // within the VM.
+  //
+  // Note: Usually the parameters are removed by the callee. In case
+  // of an exception crossing an activation frame boundary, that is
+  // not the case if the callee is compiled code => need to setup the
+  // rsp.
+  //
+  // rax: exception oop
+
+  address generate_catch_exception() {
+	  StubCodeMark mark(this, "StubRoutines", "catch_exception");
+	  address start = __ pc();
+
+	  Register thread = TREG;
+
+	  // get thread directly
+#ifndef OPT_THREAD
+	  //__ lw(thread, FP, 7 * wordSize);
+	  __ lw(thread, FP, 9 * wordSize);
+#endif
+
+#ifdef ASSERT
+	  // verify that threads correspond
+	  { Label L;
+		  __ get_thread(T7);
+		  __ beq(T7, thread, L);
+		  __ delayed()->nop();
+		  __ stop("StubRoutines::catch_exception: threads must correspond");
+		  __ bind(L);
+	  }
+#endif
+	  // set pending exception
+	  __ verify_oop(V0);
+	  __ sw(V0, thread, in_bytes(Thread::pending_exception_offset()));
+	//  __ move(AT, (int)&jerome1);
+	 // __ sw(V0, AT, 0);
+	  __ move(AT, (int)__FILE__);
+	  __ sw(AT, thread, in_bytes(Thread::exception_file_offset   ()));
+	  __ move(AT, (int)__LINE__);
+	  __ sw(AT, thread, in_bytes(Thread::exception_line_offset   ()));
+
+	  // complete return to VM
+	  assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
+	  __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
+	  __ delayed()->nop();
+
+	  return start;
+  }
+
+  // Continuation point for runtime calls returning with a pending
+  // exception.  The pending exception check happened in the runtime
+  // or native call stub.  The pending exception in Thread is
+  // converted into a Java-level exception.
+  //
+  // Contract with Java-level exception handlers:
+  // rax: exception
+  // rdx: throwing pc
+  //
+  // NOTE: At entry of this stub, exception-pc must be on stack !!
+
+  address generate_forward_exception() {
+		StubCodeMark mark(this, "StubRoutines", "forward exception");
+		//Register thread = TREG;
+		Register thread = T8;
+		address start = __ pc();
+
+		// Upon entry, the sp points to the return address returning into Java
+		// (interpreted or compiled) code; i.e., the return address becomes the
+		// throwing pc.
+		//
+		// Arguments pushed before the runtime call are still on the stack but
+		// the exception handler will reset the stack pointer -> ignore them.
+		// A potential result in registers can be ignored as well.
+
+#ifdef ASSERT
+		// make sure this code is only executed if there is a pending exception
+#ifndef OPT_THREAD
+		__ get_thread(thread);
+#endif
+		{ Label L;
+			__ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+			__ bne(AT, ZERO, L);
+			__ delayed()->nop();
+			__ stop("StubRoutines::forward exception: no pending exception (1)");
+			__ bind(L);
+		}
+#endif
+
+		// compute exception handler into T9
+		__ lw(A0, SP, 0);
+		__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), A0);
+		__ move(T9, V0);
+		__ pop(V1);
+
+#ifndef OPT_THREAD
+		__ get_thread(thread);
+#endif
+		__ lw(V0, thread, in_bytes(Thread::pending_exception_offset()));
+		__ sw(ZERO, thread, in_bytes(Thread::pending_exception_offset()));
+
+#ifdef ASSERT
+		// make sure exception is set
+		{ Label L;
+			__ bne(V0, ZERO, L);
+			__ delayed()->nop();
+			__ stop("StubRoutines::forward exception: no pending exception (2)");
+			__ bind(L);
+		}
+#endif
+
+		// continue at exception handler (return address removed)
+		// V0: exception
+		// T9: exception handler
+		// V1: throwing pc
+		__ verify_oop(V0);
+/*	__ move(AT, (int)&jerome1 );
+	__ sw(SP, AT, 0);
+	__ move(AT, (int)&jerome2 );
+	__ sw(FP, AT, 0);
+	__ move(AT, (int)&jerome3 );
+	__ sw(RA, AT, 0);
+	__ move(AT, (int)&jerome4 );
+	__ sw(T9, AT, 0);
+	__ move(AT, (int)&jerome5 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome6 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome7 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome10 );
+	__ sw(ZERO, AT, 0);
+
+	__ pushad();
+
+	//__ enter();
+	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+				relocInfo::runtime_call_type);
+	__ delayed()->nop();
+
+	//__ leave();
+	__ popad();
+
+
+  */
+		__ jr(T9);
+		__ delayed()->nop();
+
+		return start;
+  }
+
+  // Support for jint atomic::xchg(jint exchange_value, volatile jint* dest)
+  //
+  // Arguments :
+  //    c_rarg0: exchange_value
+  //    c_rarg0: dest
+  //
+  // Result:
+  //    *dest <- ex, return (orig *dest)
+#if 0
+  address generate_atomic_xchg() {
+    StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
+    address start = __ pc();
+
+    __ movl(rax, c_rarg0); // Copy to eax we need a return value anyhow
+    __ xchgl(rax, Address(c_rarg1, 0)); // automatic LOCK
+    __ ret(0);
+
+    return start;
+  }
+
+  // Support for intptr_t atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
+  //
+  // Arguments :
+  //    c_rarg0: exchange_value
+  //    c_rarg1: dest
+  //
+  // Result:
+  //    *dest <- ex, return (orig *dest)
+  address generate_atomic_xchg_ptr() {
+    StubCodeMark mark(this, "StubRoutines", "atomic_xchg_ptr");
+    address start = __ pc();
+
+    __ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
+    __ xchgptr(rax, Address(c_rarg1, 0)); // automatic LOCK
+    __ ret(0);
+
+    return start;
+  }
+
+  // Support for jint atomic::atomic_cmpxchg(jint exchange_value, volatile jint* dest,
+  //                                         jint compare_value)
+  //
+  // Arguments :
+  //    c_rarg0: exchange_value
+  //    c_rarg1: dest
+  //    c_rarg2: compare_value
+  //
+  // Result:
+  //    if ( compare_value == *dest ) {
+  //       *dest = exchange_value
+  //       return compare_value;
+  //    else
+  //       return *dest;
+  address generate_atomic_cmpxchg() {
+    StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
+    address start = __ pc();
+
+    __ movl(rax, c_rarg2);
+   if ( os::is_MP() ) __ lock();
+    __ cmpxchgl(c_rarg0, Address(c_rarg1, 0));
+    __ ret(0);
+
+    return start;
+  }
+
+  // Support for jint atomic::atomic_cmpxchg_long(jlong exchange_value,
+  //                                             volatile jlong* dest,
+  //                                             jlong compare_value)
+  // Arguments :
+  //    c_rarg0: exchange_value
+  //    c_rarg1: dest
+  //    c_rarg2: compare_value
+  //
+  // Result:
+  //    if ( compare_value == *dest ) {
+  //       *dest = exchange_value
+  //       return compare_value;
+  //    else
+  //       return *dest;
+  address generate_atomic_cmpxchg_long() {
+    StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
+    address start = __ pc();
+
+    __ movq(rax, c_rarg2);
+   if ( os::is_MP() ) __ lock();
+    __ cmpxchgq(c_rarg0, Address(c_rarg1, 0));
+    __ ret(0);
+
+    return start;
+  }
+
+  // Support for jint atomic::add(jint add_value, volatile jint* dest)
+  //
+  // Arguments :
+  //    c_rarg0: add_value
+  //    c_rarg1: dest
+  //
+  // Result:
+  //    *dest += add_value
+  //    return *dest;
+  address generate_atomic_add() {
+    StubCodeMark mark(this, "StubRoutines", "atomic_add");
+    address start = __ pc();
+
+    __ movl(rax, c_rarg0);
+   if ( os::is_MP() ) __ lock();
+    __ xaddl(Address(c_rarg1, 0), c_rarg0);
+    __ addl(rax, c_rarg0);
+    __ ret(0);
+
+    return start;
+  }
+
+  // Support for intptr_t atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest)
+  //
+  // Arguments :
+  //    c_rarg0: add_value
+  //    c_rarg1: dest
+  //
+  // Result:
+  //    *dest += add_value
+  //    return *dest;
+  address generate_atomic_add_ptr() {
+    StubCodeMark mark(this, "StubRoutines", "atomic_add_ptr");
+    address start = __ pc();
+
+    __ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
+   if ( os::is_MP() ) __ lock();
+    __ xaddptr(Address(c_rarg1, 0), c_rarg0);
+    __ addptr(rax, c_rarg0);
+    __ ret(0);
+
+    return start;
+  }
+
+  // Support for intptr_t OrderAccess::fence()
+  //
+  // Arguments :
+  //
+  // Result:
+  address generate_orderaccess_fence() {
+    StubCodeMark mark(this, "StubRoutines", "orderaccess_fence");
+    address start = __ pc();
+    __ mfence();
+    __ ret(0);
+
+    return start;
+  }
+#endif
+  // Support for intptr_t get_previous_fp()
+  //
+  // This routine is used to find the previous frame pointer for the
+  // caller (current_frame_guess). This is used as part of debugging
+  // ps() is seemingly lost trying to find frames.
+  // This code assumes that caller current_frame_guess) has a frame.
+  address generate_get_previous_fp() {
+    StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
+    const Address old_fp       (FP,  0);
+    const Address older_fp       (V0,  0);
+    address start = __ pc();
+    __ enter();
+    __ lw(V0, old_fp); // callers fp
+    __ lw(V0, older_fp); // the frame for ps()
+    __ leave();
+    __ jr(RA);
+    __ delayed()->nop();
+    return start;
+  }
+#if 0
+  //----------------------------------------------------------------------------------------------------
+  // Support for void verify_mxcsr()
+  //
+  // This routine is used with -Xcheck:jni to verify that native
+  // JNI code does not return to Java code without restoring the
+  // MXCSR register to our expected state.
+
+  address generate_verify_mxcsr() {
+    StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
+    address start = __ pc();
+
+    const Address mxcsr_save(rsp, 0);
+
+    if (CheckJNICalls) {
+      Label ok_ret;
+      __ push(rax);
+      __ subptr(rsp, wordSize);      // allocate a temp location
+      __ stmxcsr(mxcsr_save);
+      __ movl(rax, mxcsr_save);
+      __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
+      __ cmpl(rax, *(int *)(StubRoutines::x86::mxcsr_std()));
+      __ jcc(Assembler::equal, ok_ret);
+
+      __ warn("MXCSR changed by native JNI code, use -XX:+RestoreMXCSROnJNICall");
+
+      __ ldmxcsr(ExternalAddress(StubRoutines::x86::mxcsr_std()));
+
+      __ bind(ok_ret);
+      __ addptr(rsp, wordSize);
+      __ pop(rax);
+    }
+
+    __ ret(0);
+
+    return start;
+  }
+
+  address generate_f2i_fixup() {
+    StubCodeMark mark(this, "StubRoutines", "f2i_fixup");
+    Address inout(rsp, 5 * wordSize); // return address + 4 saves
+
+    address start = __ pc();
+
+    Label L;
+
+    __ push(rax);
+    __ push(c_rarg3);
+    __ push(c_rarg2);
+    __ push(c_rarg1);
+
+    __ movl(rax, 0x7f800000);
+    __ xorl(c_rarg3, c_rarg3);
+    __ movl(c_rarg2, inout);
+    __ movl(c_rarg1, c_rarg2);
+    __ andl(c_rarg1, 0x7fffffff);
+    __ cmpl(rax, c_rarg1); // NaN? -> 0
+    __ jcc(Assembler::negative, L);
+    __ testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint
+    __ movl(c_rarg3, 0x80000000);
+    __ movl(rax, 0x7fffffff);
+    __ cmovl(Assembler::positive, c_rarg3, rax);
+
+    __ bind(L);
+    __ movptr(inout, c_rarg3);
+
+    __ pop(c_rarg1);
+    __ pop(c_rarg2);
+    __ pop(c_rarg3);
+    __ pop(rax);
+
+    __ ret(0);
+
+    return start;
+  }
+
+  address generate_f2l_fixup() {
+    StubCodeMark mark(this, "StubRoutines", "f2l_fixup");
+    Address inout(rsp, 5 * wordSize); // return address + 4 saves
+    address start = __ pc();
+
+    Label L;
+
+    __ push(rax);
+    __ push(c_rarg3);
+    __ push(c_rarg2);
+    __ push(c_rarg1);
+
+    __ movl(rax, 0x7f800000);
+    __ xorl(c_rarg3, c_rarg3);
+    __ movl(c_rarg2, inout);
+    __ movl(c_rarg1, c_rarg2);
+    __ andl(c_rarg1, 0x7fffffff);
+    __ cmpl(rax, c_rarg1); // NaN? -> 0
+    __ jcc(Assembler::negative, L);
+    __ testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong
+    __ mov64(c_rarg3, 0x8000000000000000);
+    __ mov64(rax, 0x7fffffffffffffff);
+    __ cmov(Assembler::positive, c_rarg3, rax);
+
+    __ bind(L);
+    __ movptr(inout, c_rarg3);
+
+    __ pop(c_rarg1);
+    __ pop(c_rarg2);
+    __ pop(c_rarg3);
+    __ pop(rax);
+
+    __ ret(0);
+
+    return start;
+  }
+
+  address generate_d2i_fixup() {
+    StubCodeMark mark(this, "StubRoutines", "d2i_fixup");
+    Address inout(rsp, 6 * wordSize); // return address + 5 saves
+
+    address start = __ pc();
+
+    Label L;
+
+    __ push(rax);
+    __ push(c_rarg3);
+    __ push(c_rarg2);
+    __ push(c_rarg1);
+    __ push(c_rarg0);
+
+    __ movl(rax, 0x7ff00000);
+    __ movq(c_rarg2, inout);
+    __ movl(c_rarg3, c_rarg2);
+    __ mov(c_rarg1, c_rarg2);
+    __ mov(c_rarg0, c_rarg2);
+    __ negl(c_rarg3);
+    __ shrptr(c_rarg1, 0x20);
+    __ orl(c_rarg3, c_rarg2);
+    __ andl(c_rarg1, 0x7fffffff);
+    __ xorl(c_rarg2, c_rarg2);
+    __ shrl(c_rarg3, 0x1f);
+    __ orl(c_rarg1, c_rarg3);
+    __ cmpl(rax, c_rarg1);
+    __ jcc(Assembler::negative, L); // NaN -> 0
+    __ testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint
+    __ movl(c_rarg2, 0x80000000);
+    __ movl(rax, 0x7fffffff);
+    __ cmov(Assembler::positive, c_rarg2, rax);
+
+    __ bind(L);
+    __ movptr(inout, c_rarg2);
+
+    __ pop(c_rarg0);
+    __ pop(c_rarg1);
+    __ pop(c_rarg2);
+    __ pop(c_rarg3);
+    __ pop(rax);
+
+    __ ret(0);
+
+    return start;
+  }
+
+  address generate_d2l_fixup() {
+    StubCodeMark mark(this, "StubRoutines", "d2l_fixup");
+    Address inout(rsp, 6 * wordSize); // return address + 5 saves
+
+    address start = __ pc();
+
+    Label L;
+
+    __ push(rax);
+    __ push(c_rarg3);
+    __ push(c_rarg2);
+    __ push(c_rarg1);
+    __ push(c_rarg0);
+
+    __ movl(rax, 0x7ff00000);
+    __ movq(c_rarg2, inout);
+    __ movl(c_rarg3, c_rarg2);
+    __ mov(c_rarg1, c_rarg2);
+    __ mov(c_rarg0, c_rarg2);
+    __ negl(c_rarg3);
+    __ shrptr(c_rarg1, 0x20);
+    __ orl(c_rarg3, c_rarg2);
+    __ andl(c_rarg1, 0x7fffffff);
+    __ xorl(c_rarg2, c_rarg2);
+    __ shrl(c_rarg3, 0x1f);
+    __ orl(c_rarg1, c_rarg3);
+    __ cmpl(rax, c_rarg1);
+    __ jcc(Assembler::negative, L); // NaN -> 0
+    __ testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong
+    __ mov64(c_rarg2, 0x8000000000000000);
+    __ mov64(rax, 0x7fffffffffffffff);
+    __ cmovq(Assembler::positive, c_rarg2, rax);
+
+    __ bind(L);
+    __ movq(inout, c_rarg2);
+
+    __ pop(c_rarg0);
+    __ pop(c_rarg1);
+    __ pop(c_rarg2);
+    __ pop(c_rarg3);
+    __ pop(rax);
+
+    __ ret(0);
+
+    return start;
+  }
+
+  address generate_fp_mask(const char *stub_name, int64_t mask) {
+    StubCodeMark mark(this, "StubRoutines", stub_name);
+
+    __ align(16);
+    address start = __ pc();
+
+    __ emit_data64( mask, relocInfo::none );
+    __ emit_data64( mask, relocInfo::none );
+
+    return start;
+  }
+#endif
+  // The following routine generates a subroutine to throw an
+  // asynchronous UnknownError when an unsafe access gets a fault that
+  // could not be reasonably prevented by the programmer.  (Example:
+  // SIGBUS/OBJERR.)
+  address generate_handler_for_unsafe_access() {
+		StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
+		address start = __ pc();
+		//  __ pushl(0);                      // hole for return address-to-be
+		__ pushad();                      // push registers
+		//  Address next_pc(esp, RegisterImpl::number_of_registers * BytesPerWord);
+		__ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		//__ movl(next_pc, eax);            // stuff next address
+		__ sw(V0, SP, RegisterImpl::number_of_registers * BytesPerWord);
+		__ popad();
+		// __ ret(0);                        // jump to next address
+		__ jr(RA);
+		__ delayed()->nop();
+		return start;
+  }
+
+  // Non-destructive plausibility checks for oops
+  //
+  // Arguments:
+  //    all args on stack!
+  //
+  // Stack after saving c_rarg3:
+  //    [tos + 0]: saved c_rarg3
+  //    [tos + 1]: saved c_rarg2
+  //    [tos + 2]: saved r12 (several TemplateTable methods use it)
+  //    [tos + 3]: saved flags
+  //    [tos + 4]: return address
+  //  * [tos + 5]: error message (char*)
+  //  * [tos + 6]: object to verify (oop)
+  //  * [tos + 7]: saved rax - saved by caller and bashed
+  //  * = popped on exit
+  address generate_verify_oop() {
+	  StubCodeMark mark(this, "StubRoutines", "verify_oop");
+	  address start = __ pc();
+
+	  __ verify_oop_subroutine();
+
+	  return start;
+  }
+/*
+  static address disjoint_byte_copy_entry;
+  static address disjoint_short_copy_entry;
+  static address disjoint_int_copy_entry;
+  static address disjoint_long_copy_entry;
+  static address disjoint_oop_copy_entry;
+
+  static address byte_copy_entry;
+  static address short_copy_entry;
+  static address int_copy_entry;
+  static address long_copy_entry;
+  static address oop_copy_entry;
+
+  static address checkcast_copy_entry;
+
+  //
+  // Verify that a register contains clean 32-bits positive value
+  // (high 32-bits are 0) so it could be used in 64-bits shifts.
+  //
+  //  Input:
+  //    Rint  -  32-bits value
+  //    Rtmp  -  scratch
+  //
+  void assert_clean_int(Register Rint, Register Rtmp) {
+#ifdef ASSERT
+    Label L;
+    assert_different_registers(Rtmp, Rint);
+    __ movslq(Rtmp, Rint);
+    __ cmpq(Rtmp, Rint);
+    __ jcc(Assembler::equal, L);
+    __ stop("high 32-bits of int value are not 0");
+    __ bind(L);
+#endif
+  }
+*/
+  //  Generate overlap test for array copy stubs
+  //
+  //  Input:
+  //     c_rarg0 - from
+  //     c_rarg1 - to
+  //     c_rarg2 - element count
+  //
+  //  Output:
+  //     rax   - &from[element count - 1]
+  //
+/*
+  void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {
+    assert(no_overlap_target != NULL, "must be generated");
+    array_overlap_test(no_overlap_target, NULL, sf);
+  }
+  void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) {
+    array_overlap_test(NULL, &L_no_overlap, sf);
+  }
+  void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) {
+    const Register from     = c_rarg0;
+    const Register to       = c_rarg1;
+    const Register count    = c_rarg2;
+    const Register end_from = rax;
+
+    __ cmpptr(to, from);
+    __ lea(end_from, Address(from, count, sf, 0));
+    if (NOLp == NULL) {
+      ExternalAddress no_overlap(no_overlap_target);
+      __ jump_cc(Assembler::belowEqual, no_overlap);
+      __ cmpptr(to, end_from);
+      __ jump_cc(Assembler::aboveEqual, no_overlap);
+    } else {
+      __ jcc(Assembler::belowEqual, (*NOLp));
+      __ cmpptr(to, end_from);
+      __ jcc(Assembler::aboveEqual, (*NOLp));
+    }
+  }
+*/
+ //
+  //  Generate overlap test for array copy stubs
+  //
+  //  Input:
+  //     4(esp)    -  array1
+  //     8(esp)    -  array2
+  //    12(esp)    -  element count
+  //
+  //  Note: this code can only use %eax, %ecx, and %edx
+  //
+
+  //
+  //  Generate overlap test for array copy stubs
+  //
+  //  Input:
+  //     A0    -  array1
+  //     A1    -  array2
+  //     A2    -  element count
+  //
+  //  Note: this code can only use %eax, %ecx, and %edx
+  //
+
+ //use T4,T5 as temp
+  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
+	  int elem_size = 1 << log2_elem_size;
+	  Address::ScaleFactor sf = Address::times_1;
+
+	  switch (log2_elem_size) {
+		  case 0: sf = Address::times_1; break;
+		  case 1: sf = Address::times_2; break;
+		  case 2: sf = Address::times_4; break;
+		  case 3: sf = Address::times_8; break;
+	  }
+
+	  //  __ movl(eax, Address(esp, 4));  // from
+	  //  __ movl(edx, Address(esp, 8));  // to
+	  //  __ movl(ecx, Address(esp, 12));  // count
+	  //   __ cmpl(edx, eax);
+
+	  //  __ leal(eax, Address(eax, ecx, sf, -elem_size));  // from + (count - 1) * elem_size
+	  __ sll(T5, A2, sf);
+	  __ add(T5, T5, A0);
+	  __ lea(T4, Address(T5, -elem_size));
+	  // __ jcc(Assembler::belowEqual, no_overlap_target);
+	  __ sub(AT, A1,A0);
+	  __ blez(AT, no_overlap_target);
+	  __ delayed()->nop();
+	  // __ cmpl(edx, eax);
+	  __ sub(AT, A1, T4);
+	  // __ jcc(Assembler::above, no_overlap_target);
+	  __ bgtz(AT, no_overlap_target);
+	  __ delayed()->nop();
+
+  }
+	/*
+  // Shuffle first three arg regs on Windows into Linux/Solaris locations.
+  //
+  // Outputs:
+  //    rdi - rcx
+  //    rsi - rdx
+  //    rdx - r8
+  //    rcx - r9
+  //
+  // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
+  // are non-volatile.  r9 and r10 should not be used by the caller.
+  //
+  void setup_arg_regs(int nargs = 3) {
+    const Register saved_rdi = r9;
+    const Register saved_rsi = r10;
+    assert(nargs == 3 || nargs == 4, "else fix");
+#ifdef _WIN64
+    assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
+           "unexpected argument registers");
+    if (nargs >= 4)
+      __ mov(rax, r9);  // r9 is also saved_rdi
+    __ movptr(saved_rdi, rdi);
+    __ movptr(saved_rsi, rsi);
+    __ mov(rdi, rcx); // c_rarg0
+    __ mov(rsi, rdx); // c_rarg1
+    __ mov(rdx, r8);  // c_rarg2
+    if (nargs >= 4)
+      __ mov(rcx, rax); // c_rarg3 (via rax)
+#else
+    assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
+           "unexpected argument registers");
+#endif
+  }
+
+  void restore_arg_regs() {
+    const Register saved_rdi = r9;
+    const Register saved_rsi = r10;
+#ifdef _WIN64
+    __ movptr(rdi, saved_rdi);
+    __ movptr(rsi, saved_rsi);
+#endif
+  }
+
+  // Generate code for an array write pre barrier
+  //
+  //     addr    -  starting address
+  //     count    -  element count
+  //
+  //     Destroy no registers!
+  //
+  void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+        {
+          __ pusha();                      // push registers
+          if (count == c_rarg0) {
+            if (addr == c_rarg1) {
+              // exactly backwards!!
+              __ xchgptr(c_rarg1, c_rarg0);
+            } else {
+              __ movptr(c_rarg1, count);
+              __ movptr(c_rarg0, addr);
+            }
+
+          } else {
+            __ movptr(c_rarg0, addr);
+            __ movptr(c_rarg1, count);
+          }
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
+          __ popa();
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+      case BarrierSet::ModRef:
+        break;
+      default:
+        ShouldNotReachHere();
+
+    }
+  }
+
+  //
+  // Generate code for an array write post barrier
+  //
+  //  Input:
+  //     start    - register containing starting address of destination array
+  //     end      - register containing ending address of destination array
+  //     scratch  - scratch register
+  //
+  //  The input registers are overwritten.
+  //  The ending address is inclusive.
+  void  gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
+    assert_different_registers(start, end, scratch);
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+
+        {
+          __ pusha();                      // push registers (overkill)
+          // must compute element count unless barrier set interface is changed (other platforms supply count)
+          assert_different_registers(start, end, scratch);
+          __ lea(scratch, Address(end, wordSize));
+          __ subptr(scratch, start);
+          __ shrptr(scratch, LogBytesPerWord);
+          __ mov(c_rarg0, start);
+          __ mov(c_rarg1, scratch);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
+          __ popa();
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+        {
+          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+          Label L_loop;
+
+           __ shrptr(start, CardTableModRefBS::card_shift);
+           __ shrptr(end, CardTableModRefBS::card_shift);
+           __ subptr(end, start); // number of bytes to copy
+
+          intptr_t disp = (intptr_t) ct->byte_map_base;
+          if (__ is_simm32(disp)) {
+            Address cardtable(noreg, noreg, Address::no_scale, disp);
+            __ lea(scratch, cardtable);
+          } else {
+            ExternalAddress cardtable((address)disp);
+            __ lea(scratch, cardtable);
+          }
+
+          const Register count = end; // 'end' register contains bytes count now
+          __ addptr(start, scratch);
+        __ BIND(L_loop);
+          __ movb(Address(start, count, Address::times_1), 0);
+          __ decrement(count);
+          __ jcc(Assembler::greaterEqual, L_loop);
+        }
+        break;
+      default:
+        ShouldNotReachHere();
+
+    }
+  }
+
+
+  // Copy big chunks forward
+  //
+  // Inputs:
+  //   end_from     - source arrays end address
+  //   end_to       - destination array end address
+  //   qword_count  - 64-bits element count, negative
+  //   to           - scratch
+  //   L_copy_32_bytes - entry label
+  //   L_copy_8_bytes  - exit  label
+  //
+  void copy_32_bytes_forward(Register end_from, Register end_to,
+                             Register qword_count, Register to,
+                             Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
+    DEBUG_ONLY(__ stop("enter at entry label, not here"));
+    Label L_loop;
+    __ align(16);
+  __ BIND(L_loop);
+    if(UseUnalignedLoadStores) {
+      __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
+      __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
+      __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
+      __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
+
+    } else {
+      __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
+      __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
+      __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
+      __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
+      __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
+    }
+  __ BIND(L_copy_32_bytes);
+    __ addptr(qword_count, 4);
+    __ jcc(Assembler::lessEqual, L_loop);
+    __ subptr(qword_count, 4);
+    __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
+  }
+
+
+  // Copy big chunks backward
+  //
+  // Inputs:
+  //   from         - source arrays address
+  //   dest         - destination array address
+  //   qword_count  - 64-bits element count
+  //   to           - scratch
+  //   L_copy_32_bytes - entry label
+  //   L_copy_8_bytes  - exit  label
+  //
+  void copy_32_bytes_backward(Register from, Register dest,
+                              Register qword_count, Register to,
+                              Label& L_copy_32_bytes, Label& L_copy_8_bytes) {
+    DEBUG_ONLY(__ stop("enter at entry label, not here"));
+    Label L_loop;
+    __ align(16);
+  __ BIND(L_loop);
+    if(UseUnalignedLoadStores) {
+      __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
+      __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
+      __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
+      __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
+
+    } else {
+      __ movq(to, Address(from, qword_count, Address::times_8, 24));
+      __ movq(Address(dest, qword_count, Address::times_8, 24), to);
+      __ movq(to, Address(from, qword_count, Address::times_8, 16));
+      __ movq(Address(dest, qword_count, Address::times_8, 16), to);
+      __ movq(to, Address(from, qword_count, Address::times_8,  8));
+      __ movq(Address(dest, qword_count, Address::times_8,  8), to);
+      __ movq(to, Address(from, qword_count, Address::times_8,  0));
+      __ movq(Address(dest, qword_count, Address::times_8,  0), to);
+    }
+  __ BIND(L_copy_32_bytes);
+    __ subptr(qword_count, 4);
+    __ jcc(Assembler::greaterEqual, L_loop);
+    __ addptr(qword_count, 4);
+    __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
+  }
+*/
+
+  //
+  //  Generate store check for array
+  //
+  //  Input:
+  //     %edi    -  starting address
+  //     %ecx    -  element count
+  //
+  //  The 2 input registers are overwritten
+  //
+
+   //
+  //  Generate store check for array
+  //
+  //  Input:
+  //     T4    -  starting address(edi)
+  //     T5    -  element count  (ecx)
+  //
+  //  The 2 input registers are overwritten
+  //
+
+
+	void array_store_check() {
+		BarrierSet* bs = Universe::heap()->barrier_set();
+		assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+		CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+		assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+		Label l_0;
+
+		// __ leal(ecx, Address(edi, ecx, Address::times_4, -4));
+		__ sll(AT, T5, Address::times_4);
+		__ add(AT, T4, AT);
+		__ lea(T5, Address(AT, -4));
+
+		// __ shrl(edi, CardTableModRefBS::card_shift);
+		__ shr(T4, CardTableModRefBS::card_shift);
+		//__ shrl(ecx, CardTableModRefBS::card_shift);
+		__ shr(T5, CardTableModRefBS::card_shift);
+
+		// __ subl(ecx, edi);
+		__ sub(T5, T5, T4);
+		__ bind(l_0);
+		//    __ movb(Address(edi, ecx, Address::times_1, (int)ct->byte_map_base), 0);
+		__ add(AT, T4, T5);
+		__ sw(ZERO, AT, (int)ct->byte_map_base);
+		//__ decl(ecx);
+		__ addi(T5, T5, -4);
+		// __ jcc(Assembler::greaterEqual, l_0);
+		__ bgez(T5, l_0);
+		__ delayed()->nop();
+	}
+
+// Arguments:
+//   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+//             ignored
+//   name    - stub name string
+//
+// Inputs:
+//   c_rarg0   - source array address
+//   c_rarg1   - destination array address
+//   c_rarg2   - element count, treated as ssize_t, can be zero
+//
+// If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+// we let the hardware handle it.  The one to eight bytes within words,
+// dwords or qwords that span cache line boundaries will still be loaded
+// and stored atomically.
+//
+// Side Effects:
+//   disjoint_byte_copy_entry is set to the no-overlap entry point
+//   used by generate_conjoint_byte_copy().
+//
+address generate_disjoint_byte_copy(bool aligned, const char *name) {
+				StubCodeMark mark(this, "StubRoutines", name);
+				__ align(CodeEntryAlignment);
+				address start = __ pc();
+				Label l_0, l_1, l_2, l_3, l_4, l_5, l_6;
+
+				//    __ pushl(esi);
+				__ push(T3);
+				__ push(T4);
+				__ push(T5);
+				__ push(T8);
+				//   __ movl(ecx, Address(esp, 4+12));      // count
+				__ move(T5, A2);
+				//  __ pushl(edi);
+				// __ movl(esi, Address(esp, 8+ 4));      // from
+				__ move(T3, A0);
+				//  __ movl(edi, Address(esp, 8+ 8));      // to
+
+				__ move(T4, A1);
+				// copy from low to high
+				//__ movl(eax, ecx);            // original count in eax
+				__ move(T8, T5);             // original count in T5
+				//__ cmpl(ecx, 3);
+				__ addi(AT, T5, -3 );
+				//__ jcc(Assembler::belowEqual, l_4);                   // <= 3 bytes
+				__ blez(AT, l_4);
+				__ delayed()->nop();
+				if (!aligned) {
+								// align source address at dword address boundary
+								// __ movl(ecx, 4);
+								__ move(T5, 4);
+								//	__ subl(ecx, esi);
+								__ sub(T5, T5, T3);
+		  //__ andl(ecx, 3);              // prefix byte count
+		  __ andi(T5, T5, 3);
+		  //	__ jcc(Assembler::equal, l_1);                   // no prefix
+		  __ beq(T5, ZERO, l_1);
+		  __ delayed()->nop();
+		  //	__ subl(eax, ecx);            // byte count less prefix
+		  __ sub(T8,T8,T5);
+		  // copy prefix
+		  __ bind(l_0);
+		  //     __ movb(edx, Address(esi));
+		  __ lb(AT, T3, 0);
+		  //  __ movb(Address(edi), edx);
+		  __ sb(AT, T4, 0);
+		  // __ incl(esi);
+		  __ addi(T3, T3, 1);
+		  // __ incl(edi);
+		  __ addi(T4, T4, 1);
+		  //__ decl(ecx);
+		  __ addi(T5 ,T5, 1);
+		  // __ jcc(Assembler::notEqual,l_0);
+		  __ bne(T5, ZERO, l_0);
+		  __ delayed()->nop();
+		  __ bind(l_1);
+		  //   __ movl(ecx, eax);            // byte count less prefix
+		  __ move(T5, T8);
+	  }
+	  //__ shrl(ecx, 2);              // dword count
+	  __ shr(T5, 2);
+	  // __ jcc(Assembler::equal, l_4);                   // no dwords to move
+	  __ beq(T5, ZERO, l_4);
+	  __ delayed()->nop();
+	  /*  // __ cmpl(ecx, 32);
+	      __ addi(AT, T5, -32);
+	  // __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
+	  __ blez(ZERO, l_2);
+	  __ delayed()->nop();
+	  // copy aligned dwords
+	  __ rep_movl();
+
+	  //__ jmp(l_4);
+	  __ b(l_4);
+	  */
+	  // copy aligned dwords
+	  __ bind(l_2);
+	  //__ subl(edi, esi);            // edi := to - from
+	  // __ sub(T4, T4, T3);
+	  __ align(16);
+	  __ bind(l_3);
+	  //__ movl(edx, Address(esi));
+	  __ lw(AT, T3, 0);
+	  // __ movl(Address(edi, esi, Address::times_1), edx);
+	  __ sw(AT, T4, 0 );
+	  //__ addl(esi, 4);
+	  __ addi(T3, T3, 4);
+	  __ addi(T4, T4, 4);
+	  // __ decl(ecx);
+	  __ addi(T5, T5, -1);
+	  //__ jcc(Assembler::notEqual, l_3);
+	  __ bne(T5, ZERO, l_3);
+	  __ delayed()->nop();
+	  // __ addl(edi, esi);            // restore edi to "to" pointer
+	  __ bind(l_4);
+	  // __ movl(ecx, eax);
+	  __ move(T5, T8);
+	  //__ andl(ecx, 3);              // suffix byte count
+	  __ andi(T5, T5, 3);
+	  // __ jcc(Assembler::equal, l_6);                   // no suffix
+	  __ beq(T5, ZERO, l_6);
+	  __ delayed()->nop();
+	  // copy suffix
+	  __ bind(l_5);
+	  //    __ movb(edx, Address(esi));
+	  __ lb(AT, T3, 0);
+	  //  __ movb(Address(edi),edx);
+	  __ sb(AT, T4, 0);
+	  //__ incl(esi);
+	  __ addi(T3, T3, 1);
+	  //__ incl(edi);
+	  __ addi(T4, T4, 1);
+	  //   __ decl(ecx);
+	  __ addi(T5, T5, -1);
+	  //__ jcc(Assembler::notEqual, l_5);
+	  __ bne(T5, ZERO, l_5 );
+	  __ delayed()->nop();
+	  __ bind(l_6);
+	  //  __ popl(edi);
+	  // __ popl(esi);
+	  __ pop(T8);
+	  __ pop(T5);
+	  __ pop(T4);
+	  __ pop(T3);
+	  //__ ret(0);
+	  __ jr(RA);
+	  __ delayed()->nop();
+	  return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_byte_copy(bool aligned, const char *name) {
+		Label l_1, l_2, l_3, l_4, l_5;
+		StubCodeMark mark(this, "StubRoutines", name);
+		__ align(CodeEntryAlignment);
+		address start = __ pc();
+		address nooverlap_target = aligned ?
+		StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
+		StubRoutines::jbyte_disjoint_arraycopy();
+
+		__ push(T3);
+		__ push(T4);
+		__ push(T5);
+		__ push(T8);
+
+		array_overlap_test(nooverlap_target, 0);
+
+		// copy from high to low
+		//   __ pushl(esi);
+		//   __ movl(ecx, Address(esp, 4+12));      // count
+		//  __ pushl(edi);
+		// __ movl(esi, Address(esp, 8+ 4));      // from
+		// __ movl(edi, Address(esp, 8+ 8));      // to
+		__ move(T5, A2);
+		__ move(T3, A0);
+		__ move(T4, A1);
+		// __ leal(esi, Address(esi, ecx, Address::times_1, -4));   // from + count - 4
+		__ add(AT, T3, T5);
+		__ lea(T3, Address(AT, -4));
+		//	__ std();
+		//   __ leal(edi, Address(edi, ecx, Address::times_1, -4));   // to + count - 4
+		__ add(AT, T4, T5);
+		__ lea(T4, Address(AT, -4));
+		//	__ movl(eax, ecx);
+		__ move(T8, T5);
+		//	__ cmpl(ecx, 3);
+		__ addi(AT, T5, -3);
+		//	__ jcc(Assembler::belowEqual, l_3);                   // <= 3 bytes
+		__ blez(AT, l_3);
+		__ delayed()->nop();
+		//	__ shrl(ecx, 2);              // dword count
+		__ shr(T5, 2);
+		/*	__ cmpl(ecx, 32);
+				__ jcc(Assembler::above, l_2);                   // > 32 dwords
+		 */     // copy dwords aligned or not in a loop
+		// __ subl(edi, esi);
+		__ align(16);
+		__ bind(l_1);
+		// __ movl(edx, Address(esi));
+		__ lw(AT, T3, 0);
+		//__ movl(Address(edi, esi, Address::times_1), edx);
+		__ sw(AT, T4, 0);
+		//__ subl(esi, 4);
+		__ addi(T3, T3, -4);
+		__ addi(T4, T4, -4);
+		//__ decl(ecx);
+		__ addi(T5, T5, -1);
+		//__ jcc(Assembler::notEqual, l_1);
+		__ bne(T5, ZERO, l_1);
+		__ delayed()->nop();
+		//__ addl(edi, esi);
+		// __ jmp(l_3);
+		__ b(l_3);
+		__ delayed()->nop();
+		// copy dwords aligned or not with repeat move
+		__ bind(l_2);
+		//    __ rep_movl();
+		__ bind(l_3);
+		// copy suffix (0-3 bytes)
+		//   __ andl(eax, 3);              // suffix byte count
+		__ andi(T8, T8, 3);
+		//__ jcc(Assembler::equal, l_5);                   // no suffix
+		__ beq(T8, ZERO, l_5);
+		__ delayed()->nop();
+		//   __ subl(edi, esi);
+		//   __ sub(T4, T4, T3);
+		//__ addl(esi, 3);
+		__ addi(T3, T3, 3);
+		__ bind(l_4);
+		// __ movb(edx, Address(esi));
+		__ lb(AT, T3, 0);
+		//  __ movb(Address(esi, edi, Address::times_1), edx);
+		__ sb(AT, T4, 0);
+		//__ decl(esi);
+		//__ decl(eax);
+		__ addi(T3, T3, -1);
+		__ addi(T4, T4, -1);
+		__ addi(T5, T5, -1);
+		// __ jcc(Assembler::notEqual, l_4);
+		__ bne(T5, ZERO, l_4);
+		__ delayed()->nop();
+		__ bind(l_5);
+		// __ cld();
+		//__ popl(edi);
+		//__ popl(esi);
+		//__ ret(0);
+		__ pop(T8);
+		__ pop(T5);
+		__ pop(T4);
+		__ pop(T3);
+		__ jr(RA);
+		__ delayed()->nop();
+		return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+  // let the hardware handle it.  The two or four words within dwords
+  // or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_short_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_short_copy().
+  //
+  address generate_disjoint_short_copy(bool aligned, const char *name) {
+		Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8;
+		StubCodeMark mark(this, "StubRoutines", name);
+		__ align(CodeEntryAlignment);
+		address start = __ pc();
+
+	/*     __ pushl(esi);
+	       __ movl(ecx, Address(esp, 4+12));      // count
+	       __ pushl(edi);
+	       __ movl(esi, Address(esp, 8+ 4));      // from
+	       __ movl(edi, Address(esp, 8+ 8));      // to
+	       */
+		__ push(T3);
+		__ push(T4);
+		__ push(T5);
+		__ push(T8);
+		__ move(T5, A2);
+		__ move(T3, A0);
+		__ move(T4, A1);
+
+		if (!aligned) {
+		//  __ testl(ecx, ecx);
+		// __ jcc(Assembler::equal, l_5); // nothing to do
+			__ beq(T5, ZERO, l_5);
+			__ delayed()->nop();
+			// align source address at dword address boundary
+			//__ movl(eax, esi);            // original from
+			__ move(T8, T3);
+			//__ andl(eax, 3);              // either 0 or 2
+			__ andi(T8, T8, 3);
+			//	__ jcc(Assembler::equal, l_1);                   // no prefix
+			__ beq(T8, ZERO, l_1);
+			__ delayed()->nop();
+			// copy prefix
+			// __ movw(edx, Address(esi));
+			__ lh(AT, T3, 0);
+			//	__ movw(Address(edi), edx);
+			__ sh(AT, T4, 0);
+			//	__ addl(esi, eax);            // eax == 2
+			__ add(T3, T3, T8);
+			//	__ addl(edi, eax);
+			__ add(T4, T4, T8);
+			//  __ decl(ecx);
+			__ addi(T5, T5, -1);
+			__ bind(l_1);
+		}
+		//  __ movl(eax, ecx);            // word count less prefix
+		__ move(T8, T5);            // word count less prefix
+		//    __ sarl(ecx, 1);              // dword count
+		__ sra(T5, T5, 1);
+		//__ jcc(Assembler::equal, l_4);                   // no dwords to move
+		__ beq(T5, ZERO, l_4);
+		__ delayed()->nop();
+		/*   __ cmpl(ecx, 32);
+				 __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
+		// copy aligned dwords
+		__ rep_movl();
+		__ jmp(l_4 );
+		 */      // copy aligned dwords
+		__ bind(l_2);
+		// __ subl(edi, esi);
+		__ align(16);
+		__ bind(l_3);
+		//__ movl(edx, Address(esi));
+		__ lw(AT, T3, 0);
+		//__ movl(Address(edi, esi, Address::times_1), edx);
+		__ sw(AT, T4, 0 );
+		//  __ addl(esi, 4);
+		__ addi(T3, T3, 4);
+		__ addi(T4, T4, 4);
+		// __ decl(ecx);
+		__ addi(T5, T5, -1);
+		//    __ jcc(Assembler::notEqual, l_3);
+		__ bne(T5, ZERO, l_3);
+		__ delayed()->nop();
+		//    __ addl(edi, esi);
+		__ bind(l_4);
+		//    __ andl(eax, 1);              // suffix count
+		__ andi(T8, T8, 1);
+		//   __ jcc(Assembler::equal, l_5);                   // no suffix
+		__ beq(T8, ZERO, l_5);
+		__ delayed()->nop();
+		// copy suffix
+		//     __ movw(edx, Address(esi));
+		__ lh(AT, T3, 0);
+		//  __ movw(Address(edi), edx);
+		__ sh(AT, T4, 0);
+		__ bind(l_5);
+		//   __ popl(edi);
+		//    __ popl(esi);
+		//   __ ret(0);
+		__ pop(T8);
+		__ pop(T5);
+		__ pop(T4);
+		__ pop(T3);
+		__ jr(RA);
+		__ delayed()->nop();
+		return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+  // let the hardware handle it.  The two or four words within dwords
+  // or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_short_copy(bool aligned, const char *name) {
+		Label l_1, l_2, l_3, l_4, l_5;
+		StubCodeMark mark(this, "StubRoutines", name);
+		__ align(CodeEntryAlignment);
+		address start = __ pc();
+		address nooverlap_target = aligned ?
+						StubRoutines::arrayof_jshort_disjoint_arraycopy() :
+						StubRoutines::jshort_disjoint_arraycopy();
+		__ push(T3);
+		__ push(T4);
+		__ push(T5);
+		__ push(T8);
+
+		array_overlap_test(nooverlap_target, 1);
+		/*
+			 __ pushl(esi);
+			 __ movl(ecx, Address(esp, 4+12));      // count
+			 __ pushl(edi);
+			 __ movl(esi, Address(esp, 8+ 4));      // from
+			 __ movl(edi, Address(esp, 8+ 8));      // to
+		 */
+		__ move(T5, A2);
+		__ move(T3, A0);
+		__ move(T4, A1);
+
+
+		// copy dwords from high to low
+		// __ leal(esi, Address(esi, ecx, Address::times_2, -4)); // from + count*2 - 4
+		__ sll(AT, T5, Address::times_2);
+		__ add(AT, T3, AT);
+		__ lea(T3, Address( AT, -4));
+		//__ std();
+		//__ leal(edi, Address(edi, ecx, Address::times_2, -4)); // to + count*2 - 4
+		__ sll(AT,T5 , Address::times_2);
+		__ add(AT, T4, AT);
+		__ lea(T4, Address( AT, -4));
+		//  __ movl(eax, ecx);
+		__ move(T8, T5);
+		__ bind(l_1);
+		//   __ sarl(ecx, 1);              // dword count
+		__ sra(T5,T5, 1);
+		//__ jcc(Assembler::equal, l_4);                   // no dwords to move
+		__ beq(T5, ZERO, l_4);
+		__ delayed()->nop();
+		/*    __ cmpl(ecx, 32);
+					__ jcc(Assembler::above, l_3);                   // > 32 dwords
+		// copy dwords with loop
+		__ subl(edi, esi);
+		 */     __ align(16);
+		__ bind(l_2);
+		//__ movl(edx, Address(esi));
+		__ lw(AT, T3, 0);
+		//__ movl(Address(edi, esi, Address::times_1), edx);
+		__ sw(AT, T4, 0);
+		//__ subl(esi, 4);
+		__ addi(T3, T3, -4);
+		__ addi(T4, T4, -4);
+		//__ decl(ecx);
+		__ addi(T5, T5, -1);
+		//  __ jcc(Assembler::notEqual, l_2);
+		__ bne(T5, ZERO, l_2);
+		__ delayed()->nop();
+		//  __ addl(edi, esi);
+		// __ jmp(l_4);
+		__ b(l_4);
+		__ delayed()->nop();
+		// copy dwords with repeat move
+		__ bind(l_3);
+		//   __ rep_movl();
+		__ bind(l_4);
+		//  __ andl(eax, 1);              // suffix count
+		__ andi(T8, T8, 1);              // suffix count
+		//__ jcc(Assembler::equal, l_5);                   // no suffix
+		__ beq(T8, ZERO, l_5 );
+		__ delayed()->nop();
+		// copy suffix
+		//   __ movw(edx, Address(esi, 2));
+		__ lh(AT, T3, 2);
+		//  __ movw(Address(edi, 2), edx);
+		__ sh(AT, T4, 2);
+		__ bind(l_5);
+		//    __ cld();
+		//    __ popl(edi);
+		//    __ popl(esi);
+		//   __ ret(0);
+		__ pop(T8);
+		__ pop(T5);
+		__ pop(T4);
+		__ pop(T3);
+		__ jr(RA);
+		__ delayed()->nop();
+		return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
+		Label l_2, l_3, l_4, l_stchk;
+		StubCodeMark mark(this, "StubRoutines", name);
+		__ align(CodeEntryAlignment);
+		address start = __ pc();
+		/*
+			 __ pushl(esi);
+			 __ movl(ecx, Address(esp, 4+12));      // count
+			 __ pushl(edi);
+			 __ movl(esi, Address(esp, 8+ 4));      // from
+			 __ movl(edi, Address(esp, 8+ 8));      // to
+		 */
+		__ push(T3);
+		__ push(T4);
+		__ push(T5);
+		__ push(T8);
+		__ move(T5, A2);
+		__ move(T3, A0);
+		__ move(T4, A1);
+
+
+		// __ cmpl(ecx, 32);
+		// __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
+		// __ rep_movl();
+		__ b(l_2);
+		__ delayed()->nop();
+		if (is_oop) {
+		//  __ jmp(l_stchk);
+			__ b(l_stchk);
+			__ delayed()->nop();
+		}
+		//    __ popl(edi);
+		//   __ popl(esi);
+		//  __ ret(0);
+		__ pop(T8);
+		__ pop(T5);
+		__ pop(T4);
+		__ pop(T3);
+		__ jr(RA);
+		__ delayed()->nop();
+
+		__ bind(l_2);
+		//  __ subl(edi, esi);
+		//  __ testl(ecx, ecx);
+		// __ jcc(Assembler::zero, l_4);
+		__ beq(T5, ZERO, l_4);
+		__ delayed()->nop();
+		__ align(16);
+		__ bind(l_3);
+		//__ movl(edx, Address(esi));
+		__ lw(AT, T3, 0);
+		// __ movl(Address(edi, esi, Address::times_1), edx);
+		__ sw(AT, T4, 0);
+		// __ addl(esi, 4);
+		__ addi(T3, T3, 4);
+		__ addi(T4, T4, 4);
+		//   __ decl(ecx);
+		__ addi(T5, T5, -1);
+		//    __ jcc(Assembler::notEqual, l_3);
+		__ bne(T5, ZERO, l_3);
+		__ delayed()->nop();
+		if (is_oop) {
+			__ bind(l_stchk);
+			//      __ movl(edi, Address(esp, 8+ 8));
+			//     __ movl(ecx, Address(esp, 8+ 12));
+			__ move(T4, A1);
+			__ move(T5, A2);
+			array_store_check();
+		}
+		__ bind(l_4);
+		//    __ popl(edi);
+		//   __ popl(esi);
+		//  __ ret(0);
+		__ pop(T8);
+		__ pop(T5);
+		__ pop(T4);
+		__ pop(T3);
+		__ jr(RA);
+		__ delayed()->nop();
+		return start;
+	}
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
+		Label l_2, l_3, l_4, l_stchk;
+		StubCodeMark mark(this, "StubRoutines", name);
+		__ align(CodeEntryAlignment);
+		address start = __ pc();
+		address nooverlap_target;
+
+		if (is_oop) {
+			nooverlap_target = aligned ?
+							StubRoutines::arrayof_oop_disjoint_arraycopy() :
+							StubRoutines::oop_disjoint_arraycopy();
+		}else {
+			nooverlap_target = aligned ?
+							StubRoutines::arrayof_jint_disjoint_arraycopy() :
+							StubRoutines::jint_disjoint_arraycopy();
+		}
+		__ push(T3);
+		__ push(T4);
+		__ push(T5);
+		__ push(T8);
+
+		array_overlap_test(nooverlap_target, 2);
+		/*
+			 __ pushl(esi);
+			 __ movl(ecx, Address(esp, 4+12));      // count
+			 __ pushl(edi);
+			 __ movl(esi, Address(esp, 8+ 4));      // from
+			 __ movl(edi, Address(esp, 8+ 8));      // to
+		 */
+		__ move(T5, A2);
+		__ move(T3, A0);
+		__ move(T4, A1);
+
+		//__ leal(esi, Address(esi, ecx, Address::times_4, -4)); // from + count*4 - 4
+		__ sll(AT, T5, Address::times_4);
+		__ add(AT, T3, AT);
+		__ lea(T3 , Address(AT, -4));
+		//__ std();
+		//__ leal(edi, Address(edi, ecx, Address::times_4, -4)); // to + count*4 - 4
+		__ sll(AT, T5, Address::times_4);
+		__ add(AT, T4, AT);
+		__ lea(T4 , Address(AT, -4));
+
+		//    __ cmpl(ecx, 32);
+		//   __ jcc(Assembler::above, l_3);                   // > 32 dwords
+		//  __ testl(ecx, ecx);
+		//__ jcc(Assembler::zero, l_4);
+		__ beq(T5, ZERO, l_4);
+		__ delayed()->nop();
+		// __ subl(edi, esi);
+		__ align(16);
+		__ bind(l_2);
+		// __ movl(edx, Address(esi));
+		__ lw(AT, T3, 0);
+		// __ movl(Address(esi, edi, Address::times_1), edx);
+		__ sw(AT, T4, 0);
+		// __ subl(esi, 4);
+		__ addi(T3, T3, -4);
+		__ addi(T4, T4, -4);
+		//   __ decl(ecx);
+		__ addi(T5, T5, -1);
+		//__ jcc(Assembler::notEqual, l_2);
+		__ bne(T5, ZERO, l_2);
+		__ delayed()->nop();
+		if (is_oop) {
+			// __ jmp(l_stchk);
+			__ b( l_stchk);
+			__ delayed()->nop();
+		}
+		__ bind(l_4);
+		//      __ cld();
+		//     __ popl(edi);
+		//    __ popl(esi);
+		//   __ ret(0);
+		__ pop(T8);
+		__ pop(T5);
+		__ pop(T4);
+		__ pop(T3);
+		__ jr(RA);
+		__ delayed()->nop();
+		__ bind(l_3);
+		//   __ rep_movl();
+		if (is_oop) {
+			__ bind(l_stchk);
+			//  __ movl(edi, Address(esp, 8+ 8));
+			__ move(T4, A1);
+			// __ movl(ecx, Address(esp, 8+ 12));
+			__ move(T5, A2);
+			array_store_check();
+		}
+		//    __ cld();
+		//   __ popl(edi);
+		//   __ popl(esi);
+		//  __ ret(0);
+		__ pop(T8);
+		__ pop(T5);
+		__ pop(T4);
+		__ pop(T3);
+		__ jr(RA);
+		__ delayed()->nop();
+		return start;
+  }
+#if 0
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+ // Side Effects:
+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
+  //
+  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
+    const Register from        = rdi;  // source array address
+    const Register to          = rsi;  // destination array address
+    const Register qword_count = rdx;  // elements count
+    const Register end_from    = from; // source array end address
+    const Register end_to      = rcx;  // destination array end address
+    const Register saved_to    = to;
+    // End pointers are inclusive, and if count is not zero they point
+    // to the last unit copied:  end_to[0] := end_from[0]
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    // Save no-overlap entry point for generate_conjoint_long_oop_copy()
+    assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
+
+    if (is_oop) {
+      disjoint_oop_copy_entry  = __ pc();
+      // no registers are destroyed by this call
+      gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
+    } else {
+      disjoint_long_copy_entry = __ pc();
+    }
+    BLOCK_COMMENT("Entry:");
+    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+
+    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
+                      // r9 and r10 may be used to save non-volatile registers
+
+    // 'from', 'to' and 'qword_count' are now valid
+
+    // Copy from low to high addresses.  Use 'to' as scratch.
+    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+    __ negptr(qword_count);
+    __ jmp(L_copy_32_bytes);
+
+    // Copy trailing qwords
+  __ BIND(L_copy_8_bytes);
+    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+    __ increment(qword_count);
+    __ jcc(Assembler::notZero, L_copy_8_bytes);
+
+    if (is_oop) {
+      __ jmp(L_exit);
+    } else {
+      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
+      restore_arg_regs();
+      __ xorptr(rax, rax); // return 0
+      __ leave(); // required for proper stackwalking of RuntimeStub frame
+      __ ret(0);
+    }
+
+    // Copy 64-byte chunks
+    copy_32_bytes_forward(end_from, end_to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+
+    if (is_oop) {
+    __ BIND(L_exit);
+      gen_write_ref_array_post_barrier(saved_to, end_to, rax);
+      inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
+    } else {
+      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
+    }
+    restore_arg_regs();
+    __ xorptr(rax, rax); // return 0
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
+    const Register from        = rdi;  // source array address
+    const Register to          = rsi;  // destination array address
+    const Register qword_count = rdx;  // elements count
+    const Register saved_count = rcx;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
+
+    address disjoint_copy_entry = NULL;
+    if (is_oop) {
+      assert(!UseCompressedOops, "shouldn't be called for compressed oops");
+      disjoint_copy_entry = disjoint_oop_copy_entry;
+      oop_copy_entry  = __ pc();
+      array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
+    } else {
+      disjoint_copy_entry = disjoint_long_copy_entry;
+      long_copy_entry = __ pc();
+      array_overlap_test(disjoint_long_copy_entry, Address::times_8);
+    }
+    BLOCK_COMMENT("Entry:");
+    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+
+    array_overlap_test(disjoint_copy_entry, Address::times_8);
+    setup_arg_regs(); // from => rdi, to => rsi, count => rdx
+                      // r9 and r10 may be used to save non-volatile registers
+
+    // 'from', 'to' and 'qword_count' are now valid
+
+    if (is_oop) {
+      // Save to and count for store barrier
+      __ movptr(saved_count, qword_count);
+      // No registers are destroyed by this call
+      gen_write_ref_array_pre_barrier(to, saved_count);
+    }
+
+    __ jmp(L_copy_32_bytes);
+
+    // Copy trailing qwords
+  __ BIND(L_copy_8_bytes);
+    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+    __ decrement(qword_count);
+    __ jcc(Assembler::notZero, L_copy_8_bytes);
+
+    if (is_oop) {
+      __ jmp(L_exit);
+    } else {
+      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
+      restore_arg_regs();
+      __ xorptr(rax, rax); // return 0
+      __ leave(); // required for proper stackwalking of RuntimeStub frame
+      __ ret(0);
+    }
+
+    // Copy in 32-bytes chunks
+    copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
+
+    if (is_oop) {
+    __ BIND(L_exit);
+      __ lea(rcx, Address(to, saved_count, Address::times_8, -8));
+      gen_write_ref_array_post_barrier(to, rcx, rax);
+      inc_counter_np(SharedRuntime::_oop_array_copy_ctr);
+    } else {
+      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr);
+    }
+    restore_arg_regs();
+    __ xorptr(rax, rax); // return 0
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+
+  // Helper for generating a dynamic type check.
+  // Smashes no registers.
+  void generate_type_check(Register sub_klass,
+                           Register super_check_offset,
+                           Register super_klass,
+                           Label& L_success) {
+    assert_different_registers(sub_klass, super_check_offset, super_klass);
+
+    BLOCK_COMMENT("type_check:");
+
+    Label L_miss;
+
+    // a couple of useful fields in sub_klass:
+    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
+                     Klass::secondary_supers_offset_in_bytes());
+    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
+                     Klass::secondary_super_cache_offset_in_bytes());
+    Address secondary_supers_addr(sub_klass, ss_offset);
+    Address super_cache_addr(     sub_klass, sc_offset);
+
+    // if the pointers are equal, we are done (e.g., String[] elements)
+    __ cmpptr(super_klass, sub_klass);
+    __ jcc(Assembler::equal, L_success);
+
+    // check the supertype display:
+    Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
+    __ cmpptr(super_klass, super_check_addr); // test the super type
+    __ jcc(Assembler::equal, L_success);
+
+    // if it was a primary super, we can just fail immediately
+    __ cmpl(super_check_offset, sc_offset);
+    __ jcc(Assembler::notEqual, L_miss);
+
+    // Now do a linear scan of the secondary super-klass chain.
+    // The repne_scan instruction uses fixed registers, which we must spill.
+    // (We need a couple more temps in any case.)
+    // This code is rarely used, so simplicity is a virtue here.
+    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
+    {
+      __ push(rax);
+      __ push(rcx);
+      __ push(rdi);
+      assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
+
+      __ movptr(rdi, secondary_supers_addr);
+      // Load the array length.
+      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
+      // Skip to start of data.
+      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+      // Scan rcx words at [rdi] for occurance of rax
+      // Set NZ/Z based on last compare
+      __ movptr(rax, super_klass);
+      if (UseCompressedOops) {
+        // Compare against compressed form.  Don't need to uncompress because
+        // looks like orig rax is restored in popq below.
+        __ encode_heap_oop(rax);
+        __ repne_scanl();
+      } else {
+        __ repne_scan();
+      }
+
+      // Unspill the temp. registers:
+      __ pop(rdi);
+      __ pop(rcx);
+      __ pop(rax);
+
+      __ jcc(Assembler::notEqual, L_miss);
+    }
+
+    // Success.  Cache the super we found and proceed in triumph.
+    __ movptr(super_cache_addr, super_klass); // note: rax is dead
+    __ jmp(L_success);
+
+    // Fall through on failure!
+    __ BIND(L_miss);
+  }
+
+  //
+  //  Generate checkcasting array copy stub
+  //
+  //  Input:
+  //    c_rarg0   - source array address
+  //    c_rarg1   - destination array address
+  //    c_rarg2   - element count, treated as ssize_t, can be zero
+  //    c_rarg3   - size_t ckoff (super_check_offset)
+  // not Win64
+  //    c_rarg4   - oop ckval (super_klass)
+  // Win64
+  //    rsp+40    - oop ckval (super_klass)
+  //
+  //  Output:
+  //    rax ==  0  -  success
+  //    rax == -1^K - failure, where K is partial transfer count
+  //
+  address generate_checkcast_copy(const char *name) {
+
+    Label L_load_element, L_store_element, L_do_card_marks, L_done;
+
+    // Input registers (after setup_arg_regs)
+    const Register from        = rdi;   // source array address
+    const Register to          = rsi;   // destination array address
+    const Register length      = rdx;   // elements count
+    const Register ckoff       = rcx;   // super_check_offset
+    const Register ckval       = r8;    // super_klass
+
+    // Registers used as temps (r13, r14 are save-on-entry)
+    const Register end_from    = from;  // source array end address
+    const Register end_to      = r13;   // destination array end address
+    const Register count       = rdx;   // -(count_remaining)
+    const Register r14_length  = r14;   // saved copy of length
+    // End pointers are inclusive, and if length is not zero they point
+    // to the last unit copied:  end_to[0] := end_from[0]
+
+    const Register rax_oop    = rax;    // actual oop copied
+    const Register r11_klass  = r11;    // oop._klass
+
+    //---------------------------------------------------------------
+    // Assembler stub will be used for this call to arraycopy
+    // if the two arrays are subtypes of Object[] but the
+    // destination array type is not equal to or a supertype
+    // of the source type.  Each element must be separately
+    // checked.
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    checkcast_copy_entry  = __ pc();
+    BLOCK_COMMENT("Entry:");
+
+#ifdef ASSERT
+    // caller guarantees that the arrays really are different
+    // otherwise, we would have to make conjoint checks
+    { Label L;
+      array_overlap_test(L, TIMES_OOP);
+      __ stop("checkcast_copy within a single array");
+      __ bind(L);
+    }
+#endif //ASSERT
+
+    // allocate spill slots for r13, r14
+    enum {
+      saved_r13_offset,
+      saved_r14_offset,
+      saved_rbp_offset,
+      saved_rip_offset,
+      saved_rarg0_offset
+    };
+    __ subptr(rsp, saved_rbp_offset * wordSize);
+    __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
+    __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
+    setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
+                       // ckoff => rcx, ckval => r8
+                       // r9 and r10 may be used to save non-volatile registers
+#ifdef _WIN64
+    // last argument (#4) is on stack on Win64
+    const int ckval_offset = saved_rarg0_offset + 4;
+    __ movptr(ckval, Address(rsp, ckval_offset * wordSize));
+#endif
+
+    // check that int operands are properly extended to size_t
+    assert_clean_int(length, rax);
+    assert_clean_int(ckoff, rax);
+
+#ifdef ASSERT
+    BLOCK_COMMENT("assert consistent ckoff/ckval");
+    // The ckoff and ckval must be mutually consistent,
+    // even though caller generates both.
+    { Label L;
+      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
+                        Klass::super_check_offset_offset_in_bytes());
+      __ cmpl(ckoff, Address(ckval, sco_offset));
+      __ jcc(Assembler::equal, L);
+      __ stop("super_check_offset inconsistent");
+      __ bind(L);
+    }
+#endif //ASSERT
+
+    // Loop-invariant addresses.  They are exclusive end pointers.
+    Address end_from_addr(from, length, TIMES_OOP, 0);
+    Address   end_to_addr(to,   length, TIMES_OOP, 0);
+    // Loop-variant addresses.  They assume post-incremented count < 0.
+    Address from_element_addr(end_from, count, TIMES_OOP, 0);
+    Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
+
+    gen_write_ref_array_pre_barrier(to, count);
+
+    // Copy from low to high addresses, indexed from the end of each array.
+    __ lea(end_from, end_from_addr);
+    __ lea(end_to,   end_to_addr);
+    __ movptr(r14_length, length);        // save a copy of the length
+    assert(length == count, "");          // else fix next line:
+    __ negptr(count);                     // negate and test the length
+    __ jcc(Assembler::notZero, L_load_element);
+
+    // Empty array:  Nothing to do.
+    __ xorptr(rax, rax);                  // return 0 on (trivial) success
+    __ jmp(L_done);
+
+    // ======== begin loop ========
+    // (Loop is rotated; its entry is L_load_element.)
+    // Loop control:
+    //   for (count = -count; count != 0; count++)
+    // Base pointers src, dst are biased by 8*(count-1),to last element.
+    __ align(16);
+
+    __ BIND(L_store_element);
+    __ store_heap_oop(to_element_addr, rax_oop);  // store the oop
+    __ increment(count);               // increment the count toward zero
+    __ jcc(Assembler::zero, L_do_card_marks);
+
+    // ======== loop entry is here ========
+    __ BIND(L_load_element);
+    __ load_heap_oop(rax_oop, from_element_addr); // load the oop
+    __ testptr(rax_oop, rax_oop);
+    __ jcc(Assembler::zero, L_store_element);
+
+    __ load_klass(r11_klass, rax_oop);// query the object klass
+    generate_type_check(r11_klass, ckoff, ckval, L_store_element);
+    // ======== end loop ========
+
+    // It was a real error; we must depend on the caller to finish the job.
+    // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
+    // Emit GC store barriers for the oops we have copied (r14 + rdx),
+    // and report their number to the caller.
+    assert_different_registers(rax, r14_length, count, to, end_to, rcx);
+    __ lea(end_to, to_element_addr);
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
+    __ movptr(rax, r14_length);           // original oops
+    __ addptr(rax, count);                // K = (original - remaining) oops
+    __ notptr(rax);                       // report (-1^K) to caller
+    __ jmp(L_done);
+
+    // Come here on success only.
+    __ BIND(L_do_card_marks);
+    __ addptr(end_to, -wordSize);         // make an inclusive end pointer
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
+    __ xorptr(rax, rax);                  // return 0 on success
+
+    // Common exit point (success or failure).
+    __ BIND(L_done);
+    __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
+    __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
+    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
+    restore_arg_regs();
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+  //
+  //  Generate 'unsafe' array copy stub
+  //  Though just as safe as the other stubs, it takes an unscaled
+  //  size_t argument instead of an element count.
+  //
+  //  Input:
+  //    c_rarg0   - source array address
+  //    c_rarg1   - destination array address
+  //    c_rarg2   - byte count, treated as ssize_t, can be zero
+  //
+  // Examines the alignment of the operands and dispatches
+  // to a long, int, short, or byte copy loop.
+  //
+  address generate_unsafe_copy(const char *name) {
+
+    Label L_long_aligned, L_int_aligned, L_short_aligned;
+
+    // Input registers (before setup_arg_regs)
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register size        = c_rarg2;  // byte count (size_t)
+
+    // Register used as a temp
+    const Register bits        = rax;      // test copy of low bits
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // bump this on entry, not on exit:
+    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
+
+    __ mov(bits, from);
+    __ orptr(bits, to);
+    __ orptr(bits, size);
+
+    __ testb(bits, BytesPerLong-1);
+    __ jccb(Assembler::zero, L_long_aligned);
+
+    __ testb(bits, BytesPerInt-1);
+    __ jccb(Assembler::zero, L_int_aligned);
+
+    __ testb(bits, BytesPerShort-1);
+    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
+
+    __ BIND(L_short_aligned);
+    __ shrptr(size, LogBytesPerShort); // size => short_count
+    __ jump(RuntimeAddress(short_copy_entry));
+
+    __ BIND(L_int_aligned);
+    __ shrptr(size, LogBytesPerInt); // size => int_count
+    __ jump(RuntimeAddress(int_copy_entry));
+
+    __ BIND(L_long_aligned);
+    __ shrptr(size, LogBytesPerLong); // size => qword_count
+    __ jump(RuntimeAddress(long_copy_entry));
+
+    return start;
+  }
+
+  // Perform range checks on the proposed arraycopy.
+  // Kills temp, but nothing else.
+  // Also, clean the sign bits of src_pos and dst_pos.
+  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
+                              Register src_pos, // source position (c_rarg1)
+                              Register dst,     // destination array oo (c_rarg2)
+                              Register dst_pos, // destination position (c_rarg3)
+                              Register length,
+                              Register temp,
+                              Label& L_failed) {
+    BLOCK_COMMENT("arraycopy_range_checks:");
+
+    //  if (src_pos + length > arrayOop(src)->length())  FAIL;
+    __ movl(temp, length);
+    __ addl(temp, src_pos);             // src_pos + length
+    __ cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
+    __ jcc(Assembler::above, L_failed);
+
+    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
+    __ movl(temp, length);
+    __ addl(temp, dst_pos);             // dst_pos + length
+    __ cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
+    __ jcc(Assembler::above, L_failed);
+
+    // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
+    // Move with sign extension can be used since they are positive.
+    __ movslq(src_pos, src_pos);
+    __ movslq(dst_pos, dst_pos);
+
+    BLOCK_COMMENT("arraycopy_range_checks done");
+  }
+
+  //
+  //  Generate generic array copy stubs
+  //
+  //  Input:
+  //    c_rarg0    -  src oop
+  //    c_rarg1    -  src_pos (32-bits)
+  //    c_rarg2    -  dst oop
+  //    c_rarg3    -  dst_pos (32-bits)
+  // not Win64
+  //    c_rarg4    -  element count (32-bits)
+  // Win64
+  //    rsp+40     -  element count (32-bits)
+  //
+  //  Output:
+  //    rax ==  0  -  success
+  //    rax == -1^K - failure, where K is partial transfer count
+  //
+  address generate_generic_copy(const char *name) {
+
+    Label L_failed, L_failed_0, L_objArray;
+    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
+
+    // Input registers
+    const Register src        = c_rarg0;  // source array oop
+    const Register src_pos    = c_rarg1;  // source position
+    const Register dst        = c_rarg2;  // destination array oop
+    const Register dst_pos    = c_rarg3;  // destination position
+    // elements count is on stack on Win64
+#ifdef _WIN64
+#define C_RARG4 Address(rsp, 6 * wordSize)
+#else
+#define C_RARG4 c_rarg4
+#endif
+
+    { int modulus = CodeEntryAlignment;
+      int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
+      int advance = target - (__ offset() % modulus);
+      if (advance < 0)  advance += modulus;
+      if (advance > 0)  __ nop(advance);
+    }
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    // Short-hop target to L_failed.  Makes for denser prologue code.
+    __ BIND(L_failed_0);
+    __ jmp(L_failed);
+    assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
+
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // bump this on entry, not on exit:
+    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
+
+    //-----------------------------------------------------------------------
+    // Assembler stub will be used for this call to arraycopy
+    // if the following conditions are met:
+    //
+    // (1) src and dst must not be null.
+    // (2) src_pos must not be negative.
+    // (3) dst_pos must not be negative.
+    // (4) length  must not be negative.
+    // (5) src klass and dst klass should be the same and not NULL.
+    // (6) src and dst should be arrays.
+    // (7) src_pos + length must not exceed length of src.
+    // (8) dst_pos + length must not exceed length of dst.
+    //
+
+    //  if (src == NULL) return -1;
+    __ testptr(src, src);         // src oop
+    size_t j1off = __ offset();
+    __ jccb(Assembler::zero, L_failed_0);
+
+    //  if (src_pos < 0) return -1;
+    __ testl(src_pos, src_pos); // src_pos (32-bits)
+    __ jccb(Assembler::negative, L_failed_0);
+
+    //  if (dst == NULL) return -1;
+    __ testptr(dst, dst);         // dst oop
+    __ jccb(Assembler::zero, L_failed_0);
+
+    //  if (dst_pos < 0) return -1;
+    __ testl(dst_pos, dst_pos); // dst_pos (32-bits)
+    size_t j4off = __ offset();
+    __ jccb(Assembler::negative, L_failed_0);
+
+    // The first four tests are very dense code,
+    // but not quite dense enough to put four
+    // jumps in a 16-byte instruction fetch buffer.
+    // That's good, because some branch predicters
+    // do not like jumps so close together.
+    // Make sure of this.
+    guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
+
+    // registers used as temp
+    const Register r11_length    = r11; // elements count to copy
+    const Register r10_src_klass = r10; // array klass
+    const Register r9_dst_klass  = r9;  // dest array klass
+
+    //  if (length < 0) return -1;
+    __ movl(r11_length, C_RARG4);       // length (elements count, 32-bits value)
+    __ testl(r11_length, r11_length);
+    __ jccb(Assembler::negative, L_failed_0);
+
+    __ load_klass(r10_src_klass, src);
+#ifdef ASSERT
+    //  assert(src->klass() != NULL);
+    BLOCK_COMMENT("assert klasses not null");
+    { Label L1, L2;
+      __ testptr(r10_src_klass, r10_src_klass);
+      __ jcc(Assembler::notZero, L2);   // it is broken if klass is NULL
+      __ bind(L1);
+      __ stop("broken null klass");
+      __ bind(L2);
+      __ load_klass(r9_dst_klass, dst);
+      __ cmpq(r9_dst_klass, 0);
+      __ jcc(Assembler::equal, L1);     // this would be broken also
+      BLOCK_COMMENT("assert done");
+    }
+#endif
+
+    // Load layout helper (32-bits)
+    //
+    //  |array_tag|     | header_size | element_type |     |log2_element_size|
+    // 32        30    24            16              8     2                 0
+    //
+    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
+    //
+
+    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
+                    Klass::layout_helper_offset_in_bytes();
+
+    const Register rax_lh = rax;  // layout helper
+
+    __ movl(rax_lh, Address(r10_src_klass, lh_offset));
+
+    // Handle objArrays completely differently...
+    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+    __ cmpl(rax_lh, objArray_lh);
+    __ jcc(Assembler::equal, L_objArray);
+
+    //  if (src->klass() != dst->klass()) return -1;
+    __ load_klass(r9_dst_klass, dst);
+    __ cmpq(r10_src_klass, r9_dst_klass);
+    __ jcc(Assembler::notEqual, L_failed);
+
+    //  if (!src->is_Array()) return -1;
+    __ cmpl(rax_lh, Klass::_lh_neutral_value);
+    __ jcc(Assembler::greaterEqual, L_failed);
+
+    // At this point, it is known to be a typeArray (array_tag 0x3).
+#ifdef ASSERT
+    { Label L;
+      __ cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
+      __ jcc(Assembler::greaterEqual, L);
+      __ stop("must be a primitive array");
+      __ bind(L);
+    }
+#endif
+
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
+                           r10, L_failed);
+
+    // typeArrayKlass
+    //
+    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
+    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
+    //
+
+    const Register r10_offset = r10;    // array offset
+    const Register rax_elsize = rax_lh; // element size
+
+    __ movl(r10_offset, rax_lh);
+    __ shrl(r10_offset, Klass::_lh_header_size_shift);
+    __ andptr(r10_offset, Klass::_lh_header_size_mask);   // array_offset
+    __ addptr(src, r10_offset);           // src array offset
+    __ addptr(dst, r10_offset);           // dst array offset
+    BLOCK_COMMENT("choose copy loop based on element size");
+    __ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
+
+    // next registers should be set before the jump to corresponding stub
+    const Register from     = c_rarg0;  // source array address
+    const Register to       = c_rarg1;  // destination array address
+    const Register count    = c_rarg2;  // elements count
+
+    // 'from', 'to', 'count' registers should be set in such order
+    // since they are the same as 'src', 'src_pos', 'dst'.
+
+  __ BIND(L_copy_bytes);
+    __ cmpl(rax_elsize, 0);
+    __ jccb(Assembler::notEqual, L_copy_shorts);
+    __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
+    __ lea(to,   Address(dst, dst_pos, Address::times_1, 0));// dst_addr
+    __ movl2ptr(count, r11_length); // length
+    __ jump(RuntimeAddress(byte_copy_entry));
+
+  __ BIND(L_copy_shorts);
+    __ cmpl(rax_elsize, LogBytesPerShort);
+    __ jccb(Assembler::notEqual, L_copy_ints);
+    __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
+    __ lea(to,   Address(dst, dst_pos, Address::times_2, 0));// dst_addr
+    __ movl2ptr(count, r11_length); // length
+    __ jump(RuntimeAddress(short_copy_entry));
+
+  __ BIND(L_copy_ints);
+    __ cmpl(rax_elsize, LogBytesPerInt);
+    __ jccb(Assembler::notEqual, L_copy_longs);
+    __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
+    __ lea(to,   Address(dst, dst_pos, Address::times_4, 0));// dst_addr
+    __ movl2ptr(count, r11_length); // length
+    __ jump(RuntimeAddress(int_copy_entry));
+
+  __ BIND(L_copy_longs);
+#ifdef ASSERT
+    { Label L;
+      __ cmpl(rax_elsize, LogBytesPerLong);
+      __ jcc(Assembler::equal, L);
+      __ stop("must be long copy, but elsize is wrong");
+      __ bind(L);
+    }
+#endif
+    __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
+    __ lea(to,   Address(dst, dst_pos, Address::times_8, 0));// dst_addr
+    __ movl2ptr(count, r11_length); // length
+    __ jump(RuntimeAddress(long_copy_entry));
+
+    // objArrayKlass
+  __ BIND(L_objArray);
+    // live at this point:  r10_src_klass, src[_pos], dst[_pos]
+
+    Label L_plain_copy, L_checkcast_copy;
+    //  test array classes for subtyping
+    __ load_klass(r9_dst_klass, dst);
+    __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
+    __ jcc(Assembler::notEqual, L_checkcast_copy);
+
+    // Identically typed arrays can be copied without element-wise checks.
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
+                           r10, L_failed);
+
+    __ lea(from, Address(src, src_pos, TIMES_OOP,
+                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
+    __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
+                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
+    __ movl2ptr(count, r11_length); // length
+  __ BIND(L_plain_copy);
+    __ jump(RuntimeAddress(oop_copy_entry));
+
+  __ BIND(L_checkcast_copy);
+    // live at this point:  r10_src_klass, !r11_length
+    {
+      // assert(r11_length == C_RARG4); // will reload from here
+      Register r11_dst_klass = r11;
+      __ load_klass(r11_dst_klass, dst);
+
+      // Before looking at dst.length, make sure dst is also an objArray.
+      __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
+      __ jcc(Assembler::notEqual, L_failed);
+
+      // It is safe to examine both src.length and dst.length.
+#ifndef _WIN64
+      arraycopy_range_checks(src, src_pos, dst, dst_pos, C_RARG4,
+                             rax, L_failed);
+#else
+      __ movl(r11_length, C_RARG4);     // reload
+      arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
+                             rax, L_failed);
+      __ load_klass(r11_dst_klass, dst); // reload
+#endif
+
+      // Marshal the base address arguments now, freeing registers.
+      __ lea(from, Address(src, src_pos, TIMES_OOP,
+                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+      __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
+                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+      __ movl(count, C_RARG4);          // length (reloaded)
+      Register sco_temp = c_rarg3;      // this register is free now
+      assert_different_registers(from, to, count, sco_temp,
+                                 r11_dst_klass, r10_src_klass);
+      assert_clean_int(count, sco_temp);
+
+      // Generate the type check.
+      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
+                        Klass::super_check_offset_offset_in_bytes());
+      __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
+      assert_clean_int(sco_temp, rax);
+      generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
+
+      // Fetch destination element klass from the objArrayKlass header.
+      int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
+                       objArrayKlass::element_klass_offset_in_bytes());
+      __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
+      __ movl(sco_temp,      Address(r11_dst_klass, sco_offset));
+      assert_clean_int(sco_temp, rax);
+
+      // the checkcast_copy loop needs two extra arguments:
+      assert(c_rarg3 == sco_temp, "#3 already in place");
+      __ movptr(C_RARG4, r11_dst_klass);  // dst.klass.element_klass
+      __ jump(RuntimeAddress(checkcast_copy_entry));
+    }
+
+  __ BIND(L_failed);
+    __ xorptr(rax, rax);
+    __ notptr(rax); // return -1
+    __ leave();   // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+#undef length_arg
+#endif
+
+//FIXME
+  address generate_disjoint_long_copy(bool aligned, const char *name) {
+	  Label l_1, l_2;
+	  StubCodeMark mark(this, "StubRoutines", name);
+	  __ align(CodeEntryAlignment);
+	  address start = __ pc();
+
+	  //      __ movl(ecx, Address(esp, 4+8));       // count
+	  //     __ movl(eax, Address(esp, 4+0));       // from
+	  //    __ movl(edx, Address(esp, 4+4));       // to
+	  __ move(T5, A2);
+	  __ move(T3, A0);
+	  __ move(T4, A1);
+	  __ push(T3);
+	  __ push(T4);
+	  __ push(T5);
+	  //__ subl(edx, eax);
+	  //__ jmp(l_2);
+	  __ b(l_2);
+	  __ delayed()->nop();
+	  __ align(16);
+	  __ bind(l_1);
+	  //   if (VM_Version::supports_mmx()) {
+	  //     __ movq(mmx0, Address(eax));
+	  //     __ movq(Address(eax, edx, Address::times_1), mmx0);
+	  //   } else {
+	  //   __ fild_d(Address(eax));
+	  __ ld(AT, T3, 0);
+	  // __ fistp_d(Address(eax, edx, Address::times_1));
+	  __ sd (AT, T4, 0);
+	  //   }
+	  //   __ addl(eax, 8);
+	  __ addi(T3, T3, 8);
+	  __ addi(T4, T4, 8);
+	  __ bind(l_2);
+	  //    __ decl(ecx);
+	  __ addi(T5, T5, -1);
+	  //    __ jcc(Assembler::greaterEqual, l_1);
+	  __ bgez(T5, l_1);
+	  __ delayed()->nop();
+	  //  if (VM_Version::supports_mmx()) {
+	  //    __ emms();
+	  //  }
+	  //  __ ret(0);
+	  __ pop(T5);
+	  __ pop(T4);
+	  __ pop(T3);
+	  __ jr(RA);
+	  __ delayed()->nop();
+	  return start;
+  }
+
+
+  address generate_conjoint_long_copy(bool aligned, const char *name) {
+	  Label l_1, l_2;
+	  StubCodeMark mark(this, "StubRoutines", name);
+	  __ align(CodeEntryAlignment);
+	  address start = __ pc();
+	  address nooverlap_target = aligned ?
+		  StubRoutines::arrayof_jlong_disjoint_arraycopy() :
+		  StubRoutines::jlong_disjoint_arraycopy();
+		__ push(T3);
+	  __ push(T4);
+	  __ push(T5);
+
+	  array_overlap_test(nooverlap_target, 3);
+
+		/*      __ movl(ecx, Address(esp, 4+8));       // count
+						__ movl(eax, Address(esp, 4+0));       // from
+						__ movl(edx, Address(esp, 4+4));       // to
+						__ jmp(l_2);
+
+		 */
+	  __ move(T5, A2);
+	  __ move(T3, A0);
+	  __ move(T4, A1);
+	  __ sll(AT, T5, Address::times_8);
+	  __ add(AT, T3, AT);
+	  __ lea(T3 , Address(AT, -8));
+	  __ sll(AT, T5, Address::times_8);
+	  __ add(AT, T4, AT);
+	  __ lea(T4 , Address(AT, -8));
+
+
+
+	  __ b(l_2);
+	  __ delayed()->nop();
+	  __ align(16);
+		__ bind(l_1);
+		/*      if (VM_Version::supports_mmx()) {
+						__ movq(mmx0, Address(eax, ecx, Address::times_8));
+						__ movq(Address(edx, ecx,Address::times_8), mmx0);
+						} else {
+						__ fild_d(Address(eax, ecx, Address::times_8));
+						__ fistp_d(Address(edx, ecx,Address::times_8));
+						}
+		 */
+		__ ld(AT, T3, 0);
+		__ sd (AT, T4, 0);
+	  __ addi(T3, T3, -8);
+	  __ addi(T4, T4,-8);
+	  __ bind(l_2);
+	  //	    __ decl(ecx);
+	  __ addi(T5, T5, -1);
+	  //__ jcc(Assembler::greaterEqual, l_1);
+	  __ bgez(T5, l_1);
+	  __ delayed()->nop();
+	  //      if (VM_Version::supports_mmx()) {
+	  //      __ emms();
+	  //   }
+	  //  __ ret(0);
+	  __ pop(T5);
+	  __ pop(T4);
+	  __ pop(T3);
+	  __ jr(RA);
+	  __ delayed()->nop();
+	  return start;
+  }
+
+  void generate_arraycopy_stubs() {
+/*
+    // Call the conjoint generation methods immediately after
+    // the disjoint ones so that short branches from the former
+    // to the latter can be generated.
+    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
+    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
+
+    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
+    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, "jshort_arraycopy");
+
+    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
+    StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
+
+    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy");
+    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy");
+
+
+    if (UseCompressedOops) {
+      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
+    } else {
+      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
+    }
+
+    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
+    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
+    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
+
+    // We don't generate specialized code for HeapWord-aligned source
+    // arrays, so just use the code we've already generated
+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
+    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
+
+    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
+    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
+
+    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
+    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
+
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
+    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
+
+    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
+		StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
+ */
+		StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
+		StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
+		StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
+		StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
+		StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
+		StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
+
+		//  if (VM_Version::supports_mmx())
+		//if (false)
+		// StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_mmx_copy_aligned("arrayof_jshort_disjoint_arraycopy");
+		// else
+		StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
+		StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(true, false, "arrayof_jint_disjoint_arraycopy");
+		StubRoutines::_arrayof_oop_disjoint_arraycopy   = generate_disjoint_int_oop_copy(true, true, "arrayof_oop_disjoint_arraycopy");
+		StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
+
+		StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
+		StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
+		StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
+		StubRoutines::_oop_arraycopy   	= generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
+		StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
+
+		StubRoutines::_arrayof_jbyte_arraycopy  = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
+		StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
+		StubRoutines::_arrayof_jint_arraycopy   = generate_conjoint_int_oop_copy(true, false, "arrayof_jint_arraycopy");
+		StubRoutines::_arrayof_oop_arraycopy    = generate_conjoint_int_oop_copy(true, true, "arrayof_oop_arraycopy");
+		StubRoutines::_arrayof_jlong_arraycopy  = generate_conjoint_long_copy(true, "arrayof_jlong_arraycopy");
+	}
+
+#undef __
+#define __ masm->
+
+  // Continuation point for throwing of implicit exceptions that are
+  // not handled in the current activation. Fabricates an exception
+  // oop and initiates normal exception dispatching in this
+  // frame. Since we need to preserve callee-saved values (currently
+  // only for C2, but done for C1 as well) we need a callee-saved oop
+  // map and therefore have to make these stubs into RuntimeStubs
+  // rather than BufferBlobs.  If the compiler needs all registers to
+  // be preserved between the fault point and the exception handler
+  // then it must assume responsibility for that in
+  // AbstractCompiler::continuation_for_implicit_null_exception or
+  // continuation_for_implicit_division_by_zero_exception. All other
+  // implicit exceptions (e.g., NullPointerException or
+  // AbstractMethodError on entry) are either at call sites or
+  // otherwise assume that stack unwinding will be initiated, so
+  // caller saved registers were assumed volatile in the compiler.
+  address generate_throw_exception(const char* name,
+                                   address runtime_entry,
+                                   bool restore_saved_exception_pc) {
+    // Information about frame layout at time of blocking runtime call.
+    // Note that we only have to preserve callee-saved registers since
+    // the compilers are responsible for supplying a continuation point
+		// if they expect all registers to be preserved.
+		enum layout {
+			thread_off,    // last_java_sp
+			S7_off,        // callee saved register      sp + 1
+			S6_off,        // callee saved register      sp + 2
+			S5_off,        // callee saved register      sp + 3
+			S4_off,        // callee saved register      sp + 4
+			S3_off,        // callee saved register      sp + 5
+			S2_off,        // callee saved register      sp + 6
+			S1_off,        // callee saved register      sp + 7
+			S0_off,        // callee saved register      sp + 8
+			FP_off,
+			ret_address,
+			framesize
+		};
+
+		int insts_size = 2048;
+		int locs_size  = 32;
+
+		//  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
+		//  NULL, NULL, NULL, false, NULL, name, false);
+		CodeBuffer code (name , insts_size, locs_size);
+		OopMapSet* oop_maps  = new OopMapSet();
+		MacroAssembler* masm = new MacroAssembler(&code);
+
+		address start = __ pc();
+		/*
+			 __ move(AT, (int)&jerome1 );
+			 __ sw(SP, AT, 0);
+			 __ move(AT, (int)&jerome2 );
+			 __ sw(FP, AT, 0);
+			 __ move(AT, (int)&jerome3 );
+			 __ sw(RA, AT, 0);
+			 __ move(AT, (int)&jerome4 );
+			 __ sw(ZERO, AT, 0);
+			 __ move(AT, (int)&jerome5 );
+			 __ sw(ZERO, AT, 0);
+			 __ move(AT, (int)&jerome6 );
+			 __ sw(ZERO, AT, 0);
+			 __ move(AT, (int)&jerome7 );
+			 __ sw(ZERO, AT, 0);
+			 __ move(AT, (int)&jerome10 );
+			 __ sw(ZERO, AT, 0);
+
+			 __ pushad();
+
+		//__ enter();
+		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+		relocInfo::runtime_call_type);
+		__ delayed()->nop();
+
+		//__ leave();
+		__ popad();
+
+		 */
+
+		// This is an inlined and slightly modified version of call_VM
+		// which has the ability to fetch the return PC out of
+		// thread-local storage and also sets up last_Java_sp slightly
+		// differently than the real call_VM
+#ifndef OPT_THREAD
+		Register java_thread = A0;
+		__ get_thread(java_thread);
+#else
+		Register java_thread = TREG;
+#endif
+		if (restore_saved_exception_pc) {
+			__ lw(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); // eax
+		}
+
+		__ enter(); // required for proper stackwalking of RuntimeStub frame
+
+		__ addi(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
+		__ sw(S0, SP, S0_off * wordSize);
+		__ sw(S1, SP, S1_off * wordSize);
+		__ sw(S2, SP, S2_off * wordSize);
+		__ sw(S3, SP, S3_off * wordSize);
+		__ sw(S4, SP, S4_off * wordSize);
+		__ sw(S5, SP, S5_off * wordSize);
+		__ sw(S6, SP, S6_off * wordSize);
+		__ sw(S7, SP, S7_off * wordSize);
+
+		int frame_complete = __ pc() - start;
+		// push java thread (becomes first argument of C function)
+		__ sw(java_thread, SP, thread_off * wordSize);
+		if (java_thread!=A0)
+			__ move(A0, java_thread);
+
+		// Set up last_Java_sp and last_Java_fp
+		__ set_last_Java_frame(java_thread, SP, FP, NULL);
+		__ relocate(relocInfo::internal_pc_type);
+		{
+			int save_pc = (int)__ pc() +  12 + NativeCall::return_address_offset;
+			__ lui(AT, Assembler::split_high(save_pc));
+			__ addiu(AT, AT, Assembler::split_low(save_pc));
+		}
+		__ sw(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+		// Call runtime
+		__ lui(T9, Assembler::split_high((int)runtime_entry));
+		__ addiu(T9, T9, Assembler::split_low((int)runtime_entry));
+		__ jalr(T9);
+		__ delayed()->nop();
+		// Generate oop map
+		OopMap* map =  new OopMap(framesize, 0);
+		oop_maps->add_gc_map(__ offset(),  map);
+
+		// restore the thread (cannot use the pushed argument since arguments
+		// may be overwritten by C code generated by an optimizing compiler);
+		// however can use the register value directly if it is callee saved.
+#ifndef OPT_THREAD
+		__ get_thread(java_thread);
+#endif
+
+		__ lw(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+		//  __ reset_last_Java_frame(java_thread, true);
+		__ reset_last_Java_frame(java_thread, true, true);
+
+		// Restore callee save registers.  This must be done after resetting the Java frame
+		__ lw(S0, SP, S0_off * wordSize);
+		__ lw(S1, SP, S1_off * wordSize);
+		__ lw(S2, SP, S2_off * wordSize);
+		__ lw(S3, SP, S3_off * wordSize);
+		__ lw(S4, SP, S4_off * wordSize);
+		__ lw(S5, SP, S5_off * wordSize);
+		__ lw(S6, SP, S6_off * wordSize);
+		__ lw(S7, SP, S7_off * wordSize);
+
+		// discard arguments
+		__ addi(SP, SP, (framesize-2) * wordSize); // epilog
+		//	__ leave(); // required for proper stackwalking of RuntimeStub frame
+		__ addi(SP, FP, wordSize);
+		__ lw(FP, SP, -1*wordSize);
+		// check for pending exceptions
+#ifdef ASSERT
+		Label L;
+		__ lw(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
+		__ bne(AT, ZERO, L);
+		__ delayed()->nop();
+		__ should_not_reach_here();
+		__ bind(L);
+#endif //ASSERT
+		__ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code,frame_complete,
+										framesize, oop_maps, false);
+		return stub->entry_point();
+  }
+
+  // Initialization
+  void generate_initial() {
+/*
+		// Generates all stubs and initializes the entry points
+
+    // This platform-specific stub is needed by generate_call_stub()
+    StubRoutines::x86::_mxcsr_std        = generate_fp_mask("mxcsr_std",        0x0000000000001F80);
+
+    // entry points that exist in all platforms Note: This is code
+    // that could be shared among different platforms - however the
+    // benefit seems to be smaller than the disadvantage of having a
+    // much more complicated generator structure. See also comment in
+    // stubRoutines.hpp.
+
+    StubRoutines::_forward_exception_entry = generate_forward_exception();
+
+    StubRoutines::_call_stub_entry =
+      generate_call_stub(StubRoutines::_call_stub_return_address);
+
+    // is referenced by megamorphic call
+    StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+    // atomic calls
+    StubRoutines::_atomic_xchg_entry         = generate_atomic_xchg();
+    StubRoutines::_atomic_xchg_ptr_entry     = generate_atomic_xchg_ptr();
+    StubRoutines::_atomic_cmpxchg_entry      = generate_atomic_cmpxchg();
+    StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
+    StubRoutines::_atomic_add_entry          = generate_atomic_add();
+    StubRoutines::_atomic_add_ptr_entry      = generate_atomic_add_ptr();
+    StubRoutines::_fence_entry               = generate_orderaccess_fence();
+
+    StubRoutines::_handler_for_unsafe_access_entry =
+      generate_handler_for_unsafe_access();
+
+    // platform dependent
+    StubRoutines::x86::_get_previous_fp_entry = generate_get_previous_fp();
+
+    StubRoutines::x86::_verify_mxcsr_entry    = generate_verify_mxcsr();
+*/
+		// Generates all stubs and initializes the entry points
+
+		//-------------------------------------------------------------
+		//-----------------------------------------------------------
+		// entry points that exist in all platforms
+		// Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
+		// than the disadvantage of having a much more complicated generator structure.
+		// See also comment in stubRoutines.hpp.
+		StubRoutines::_forward_exception_entry = generate_forward_exception();
+		StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
+		// is referenced by megamorphic call
+		StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+		StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
+
+		// platform dependent
+		StubRoutines::gs2::_get_previous_fp_entry = generate_get_previous_fp();
+	}
+
+void generate_all() {
+    // Generates all stubs and initializes the entry points
+
+    // These entry points require SharedInfo::stack0 to be set up in
+    // non-core builds and need to be relocatable, so they each
+    // fabricate a RuntimeStub internally.
+	/*
+    StubRoutines::_throw_AbstractMethodError_entry =
+      generate_throw_exception("AbstractMethodError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_AbstractMethodError),
+                               false);
+
+    StubRoutines::_throw_IncompatibleClassChangeError_entry =
+      generate_throw_exception("IncompatibleClassChangeError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_IncompatibleClassChangeError),
+                               false);
+
+    StubRoutines::_throw_ArithmeticException_entry =
+      generate_throw_exception("ArithmeticException throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_ArithmeticException),
+                               true);
+
+    StubRoutines::_throw_NullPointerException_entry =
+      generate_throw_exception("NullPointerException throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_NullPointerException),
+                               true);
+
+    StubRoutines::_throw_NullPointerException_at_call_entry =
+      generate_throw_exception("NullPointerException at call throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_NullPointerException_at_call),
+                               false);
+
+    StubRoutines::_throw_StackOverflowError_entry =
+      generate_throw_exception("StackOverflowError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_StackOverflowError),
+                               false);
+
+    // entry points that are platform specific
+    StubRoutines::x86::_f2i_fixup = generate_f2i_fixup();
+    StubRoutines::x86::_f2l_fixup = generate_f2l_fixup();
+    StubRoutines::x86::_d2i_fixup = generate_d2i_fixup();
+    StubRoutines::x86::_d2l_fixup = generate_d2l_fixup();
+
+    StubRoutines::x86::_float_sign_mask  = generate_fp_mask("float_sign_mask",  0x7FFFFFFF7FFFFFFF);
+    StubRoutines::x86::_float_sign_flip  = generate_fp_mask("float_sign_flip",  0x8000000080000000);
+    StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
+    StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
+
+    // support for verify_oop (must happen after universe_init)
+    StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
+
+    // arraycopy stubs used by compilers
+    generate_arraycopy_stubs();
+	*/
+		StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
+		StubRoutines::_throw_ArithmeticException_entry         = generate_throw_exception("ArithmeticException throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException),  true);
+		StubRoutines::_throw_NullPointerException_entry        = generate_throw_exception("NullPointerException throw_exception",         CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
+		StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
+		StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
+
+		//------------------------------------------------------
+		//------------------------------------------------------------------
+		// entry points that are platform specific
+
+		// support for verify_oop (must happen after universe_init)
+		StubRoutines::_verify_oop_subroutine_entry	   = generate_verify_oop();
+#ifndef CORE
+		// arraycopy stubs used by compilers
+		generate_arraycopy_stubs();
+#endif
+
+	}
+
+ public:
+  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+    if (all) {
+      generate_all();
+    } else {
+      generate_initial();
+    }
+  }
+}; // end class declaration
+/*
+address StubGenerator::disjoint_byte_copy_entry  = NULL;
+address StubGenerator::disjoint_short_copy_entry = NULL;
+address StubGenerator::disjoint_int_copy_entry   = NULL;
+address StubGenerator::disjoint_long_copy_entry  = NULL;
+address StubGenerator::disjoint_oop_copy_entry   = NULL;
+
+address StubGenerator::byte_copy_entry  = NULL;
+address StubGenerator::short_copy_entry = NULL;
+address StubGenerator::int_copy_entry   = NULL;
+address StubGenerator::long_copy_entry  = NULL;
+address StubGenerator::oop_copy_entry   = NULL;
+
+address StubGenerator::checkcast_copy_entry = NULL;
+*/
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+  StubGenerator g(code, all);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_stubRoutines_mips.cpp.incl"
+
+// Implementation of the platform-specific part of StubRoutines - for
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+//find the last fp value
+address StubRoutines::gs2::_get_previous_fp_entry                           = NULL;
+address StubRoutines::gs2::_call_stub_compiled_return                        = NULL;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+//static bool    returns_to_call_stub(address return_pc)   { return return_pc == _call_stub_return_address; }
+  static bool    returns_to_call_stub(address return_pc){
+	  return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return();
+  }
+
+enum platform_dependent_constants
+{
+  //code_size1 =  19000, // simply increase if too small (assembler will
+                      // crash if too small)
+  //code_size2 = 22000  // simply increase if too small (assembler will
+                      // crash if too small)
+  code_size1 = 20000,		// simply increase if too small (assembler will crash if too small)
+  code_size2 = 40000		// simply increase if too small (assembler will crash if too small)
+};
+//aoqi:FIXME class name gs2?
+class gs2 {
+	friend class StubGenerator;
+ 	friend class VMStructs;
+private:
+	// If we call compiled code directly from the call stub we will
+  // need to adjust the return back to the call stub to a specialized
+  // piece of code that can handle compiled results and cleaning the fpu
+  // stack. The variable holds that location.
+	static address _call_stub_compiled_return;
+	static address _get_previous_fp_entry;
+
+public:
+  // Call back points for traps in compiled code
+	static address get_previous_fp_entry()     { return _get_previous_fp_entry; }
+  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
+  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
+
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,29 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+ protected:
+
+ void generate_fixed_frame(bool native_call);
+
+ // address generate_asm_interpreter_entry(bool synchronized);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,2325 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_interpreter_mips.cpp.incl"
+
+#define __ _masm->
+
+#ifndef CC_INTERP
+
+const int Interpreter::return_sentinel = 0xfeedbeed;
+const int method_offset = frame::interpreter_frame_method_offset * wordSize;
+const int bci_offset    = frame::interpreter_frame_bcx_offset    * wordSize;
+const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
+
+//-----------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+  address entry = __ pc();
+
+#ifdef ASSERT
+  {
+    Label L;
+		__ addi(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+		__ sub(T1, T1, SP); // T1 = maximal sp for current fp
+		__ bgez(T1, L);     // check if frame is complete
+		__ delayed()->nop();
+		__ stop("interpreter frame not set up");
+		__ bind(L);
+  }
+#endif // ASSERT
+  // Restore bcp under the assumption that the current frame is still
+  // interpreted
+	// FIXME: please change the func restore_bcp
+	// S0 is the conventional register for bcp
+  __ restore_bcp();
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // throw exception
+  //__ call_VM(noreg,
+  //           CAST_FROM_FN_PTR(address,
+  //                            InterpreterRuntime::throw_StackOverflowError));
+	// FIXME: why do not pass parameter thread ?
+	__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(
+        const char* name) {
+  address entry = __ pc();
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // setup parameters
+  // ??? convention: expect aberrant index in register ebx
+  //__ lea(c_rarg1, ExternalAddress((address)name));
+  //__ call_VM(noreg,
+  //           CAST_FROM_FN_PTR(address,
+  //                            InterpreterRuntime::
+  //                            throw_ArrayIndexOutOfBoundsException),
+  //           c_rarg1, rbx);
+	__ move(A1, (int)name);
+	__ call_VM(noreg, CAST_FROM_FN_PTR(address,
+			InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+  address entry = __ pc();
+
+  // object is at TOS
+//FIXME, I am not sure if the object is at TOS as x86 do now @jerome, 04/20,2007
+  //__ pop(c_rarg1);
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  __ empty_FPU_stack();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(
+        const char* name, const char* message, bool pass_oop) {
+	assert(!pass_oop || message == NULL, "either oop or message but not both");
+	address entry = __ pc();
+
+	// expression stack must be empty before entering the VM if an exception happened
+	__ empty_expression_stack();
+	// setup parameters
+	__ move(A1, (int)name);
+	if (pass_oop) {
+		__ call_VM(V0,
+		CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
+	} else {
+		__ move(A2, (int)message);
+		__ call_VM(V0,
+		CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
+	}
+	// throw exception
+	__ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
+	__ delayed()->nop();
+	return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
+  address entry = __ pc();
+  // NULL last_sp until next java call
+  //__ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  //__ dispatch_next(state);
+	__ sw(ZERO,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+	__ dispatch_next(state);
+  return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
+                                                                int step) {
+	Label interpreter_entry;
+	address compiled_entry = __ pc();
+
+#ifdef COMPILER2
+	// The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
+	if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) {
+		for (int i = 1; i < 8; i++) {
+			__ ffree(i);
+		}
+	} else if (UseSSE < 2) {
+		__ empty_FPU_stack();
+	}
+#endif
+	if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) {
+		__ MacroAssembler::verify_FPU(1, "generate_return_entry_for compiled");
+	} else {
+		__ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled");
+	}
+
+	// __ jmp(interpreter_entry, relocInfo::none);
+	__ b(interpreter_entry);
+	__ delayed()->nop();
+	// emit a sentinel we can test for when converting an interpreter
+	// entry point to a compiled entry point.
+	__ a_long(Interpreter::return_sentinel);
+	__ a_long((int)compiled_entry);
+
+	address entry = __ pc();
+
+	__ bind(interpreter_entry);
+
+	/*  // In SSE mode, interpreter returns FP results in xmm0 but they need
+	// to end up back on the FPU so it can operate on them.
+	if (state == ftos && UseSSE >= 1) {
+	__ subl(esp, wordSize);
+	__ movss(Address(esp, 0), xmm0);
+	__ fld_s(Address(esp, 0));
+	__ addl(esp, wordSize);
+	} else if (state == dtos && UseSSE >= 2) {
+	__ subl(esp, 2*wordSize);
+	__ movsd(Address(esp, 0), xmm0);
+	__ fld_d(Address(esp, 0));
+	__ addl(esp, 2*wordSize);
+	}
+	*/
+	__ MacroAssembler::verify_FPU(state == ftos || state == dtos ? 1 : 0, "generate_return_entry_for in interpreter");
+
+	// Restore stack bottom in case i2c adjusted stack
+	// __ movl(esp, Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize));
+	__ lw(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+	// and NULL it as marker that esp is now tos until next java call
+	// __ movl(Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
+	__ sw(ZERO,FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+	__ restore_bcp();
+	__ restore_locals();
+	__ get_cache_and_index_at_bcp(T7, T3, 1);
+//shift 4 here , because in get_cache_and_index_at_bcp, we do not shift
+	//__ sll(T3, T3, Address::times_4);
+	__ sll(T3, T3,4);
+	__ add(T3, T3, T7);
+	__ lw(T7, T3, in_bytes(constantPoolCacheOopDesc::base_offset()
+				+ConstantPoolCacheEntry::flags_offset()));
+	__ andi(T7, T7, 0xFF);
+	__ sll(T7, T7, Interpreter::stackElementScale());
+	__ add(SP, SP, T7);
+/*
+	Label mmm;
+	__ move(AT,  (int)&jerome4);
+	__ lw(AT, AT, 0);
+	__ beq(AT, ZERO, mmm);
+	__ delayed()->nop();
+
+	__ move(AT, (int)&jerome1 );
+	__ sw(SP, AT, 0);
+	__ move(AT, (int)&jerome2 );
+	__ sw(FP, AT, 0);
+	__ move(AT, (int)&jerome3 );
+	__ sw(BCP, AT, 0);
+
+	__ move(AT, (int)&jerome4 );
+	__ sw(RA, AT, 0);
+	__ move(AT, (int)&jerome5 );
+	__ sw(T7, AT, 0);
+
+	__ move(AT, (int)&jerome6 );
+	__ sw(V0 , AT, 0);
+	__ move(AT, (int)&jerome7 );
+	__ sw(V1 , AT, 0);
+	__ move(AT, (int)&jerome8 );
+	__ sw(ZERO , AT, 0);
+
+	__ move(AT, (int)&jerome9 );
+	__ move(RA, step);
+	__ sw(RA , AT, 0);
+	__ move(AT, (int)&jerome10 );
+	__ lbu(RA, BCP, step);
+	__ sw(RA , AT, 0);
+
+
+	__ move(AT, (int)&jerome5 );
+	__ lw(RA, AT, 0);
+
+
+	__ pushad();
+//	__ enter();
+	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+				relocInfo::runtime_call_type);
+	__ delayed()->nop();
+//	__ leave();
+	__ popad();
+
+	__ bind(mmm);
+*/
+
+
+	__ dispatch_next(state, step);
+  return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
+                                                               int step) {
+  address entry = __ pc();
+  // NULL last_sp until next java call
+  //__ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+	__ sw(ZERO, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ restore_bcp();
+  __ restore_locals();
+  // handle exceptions
+  {
+    Label L;
+		const Register thread = TREG;
+#ifndef OPT_THREAD
+		__ get_thread(thread);
+#endif
+		__ lw(AT, thread, in_bytes(Thread::pending_exception_offset()));
+		__ beq(AT, ZERO, L);
+		__ delayed()->nop();
+		__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
+		__ should_not_reach_here();
+		__ bind(L);
+  }
+  __ dispatch_next(state, step);
+  return entry;
+}
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+  int i = 0;
+/*
+  switch (type) {
+    case T_BOOLEAN: i = 0; break;
+    case T_CHAR   : i = 1; break;
+    case T_BYTE   : i = 2; break;
+    case T_SHORT  : i = 3; break;
+    case T_INT    : i = 4; break;
+    case T_LONG   : i = 5; break;
+    case T_VOID   : i = 6; break;
+    case T_FLOAT  : i = 7; break;
+    case T_DOUBLE : i = 8; break;
+    case T_OBJECT : i = 9; break;
+    case T_ARRAY  : i = 9; break;
+    default       : ShouldNotReachHere();
+  }
+*/
+	switch (type) {
+		case T_BOOLEAN: i = 0; break;
+		case T_CHAR   : i = 1; break;
+		case T_BYTE   : i = 2; break;
+		case T_SHORT  : i = 3; break;
+		case T_INT    : // fall through
+		case T_LONG   : // fall through
+		case T_VOID   : i = 4; break;
+		case T_FLOAT  : i = 5; break;
+		case T_DOUBLE : i = 6; break;
+		case T_OBJECT : // fall through
+		case T_ARRAY  : i = 7; break;
+		default       : ShouldNotReachHere();
+	}
+  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
+         "index out of bounds");
+  return i;
+}
+
+
+// why do not consider float and double , @jerome, 12/27,06, @jerome
+address TemplateInterpreterGenerator::generate_result_handler_for(
+        BasicType type) {
+	address entry = __ pc();
+	switch (type) {
+		case T_BOOLEAN: __ c2bool(V0);             break;
+		case T_CHAR   : __ andi(V0, V0, 0xFFFF);   break;
+		case T_BYTE   : __ sign_extend_byte (V0);  break;
+		case T_SHORT  : __ sign_extend_short(V0);  break;
+		case T_INT    : /* nothing to do */        break;
+		case T_FLOAT  : /* nothing to do */        break;
+		case T_DOUBLE : /* nothing to do */        break;
+		case T_OBJECT :
+		{
+		//	__ beq(V0, ZERO, L);       // test if NULL handle
+		//	__ delayed()->nop();       // if not then
+		//	__ lw(V0, V0, 0);          // unbox result
+	 		__ lw(V0,FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+			__ verify_oop(V0);         // and verify it
+		}
+							   break;
+		default       : ShouldNotReachHere();
+	}
+	__ jr(RA);                                  // return from result handler
+	__ delayed()->nop();
+	return entry;
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for(
+        TosState state,
+        address runtime_entry) {
+  address entry = __ pc();
+  __ push(state);
+  __ call_VM(noreg, runtime_entry);
+  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
+  return entry;
+}
+
+
+
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+
+// increment invocation count & check for overflow
+//
+// Note: checking for negative value instead of overflow
+//       so we have a 'sticky' overflow test
+//
+// prerequisites : method in T7, invocation counter in T3
+void InterpreterGenerator::generate_counter_incr(
+        Label* overflow,
+        Label* profile_method,
+        Label* profile_method_continue) {
+
+	const Address invocation_counter(T7, in_bytes(methodOopDesc::invocation_counter_offset())
+			+ in_bytes(InvocationCounter::counter_offset()));
+	const Address backedge_counter  (T7, in_bytes(methodOopDesc::backedge_counter_offset())
+			+ in_bytes(InvocationCounter::counter_offset()));
+
+	if (ProfileInterpreter) { // %%% Merge this into methodDataOop
+		__ lw(AT, T7, in_bytes(methodOopDesc::interpreter_invocation_counter_offset()));
+		__ addiu(AT, AT, 1);
+		__ sw(AT, T7, in_bytes(methodOopDesc::interpreter_invocation_counter_offset()));
+	}
+	// Update standard invocation counters
+	__ lw(FSR, backedge_counter);
+
+	__ increment(T3, InvocationCounter::count_increment);
+	// a buffer bit added, this no need now
+	// by yjl 10/24/2005
+	//__ move(AT, InvocationCounter::count_mask_value);
+	//__ andr(FSR, FSR, AT);
+
+	__ sw(T3, invocation_counter);
+	__ add(T3, T3, FSR);
+
+	// profile_method is non-null only for interpreted method so
+	// profile_method != NULL == !native_call
+
+	if (ProfileInterpreter && profile_method != NULL) {
+		// Test to see if we should create a method data oop
+		__ lui(AT, Assembler::split_high(
+					int(&InvocationCounter::InterpreterProfileLimit)));
+		__ lw(AT, AT, Assembler::split_low(
+					int(&InvocationCounter::InterpreterProfileLimit)));
+		__ slt(AT, T3, AT);
+		__ bne(AT, ZERO, *profile_method_continue);
+		__ delayed()->nop();
+
+		// if no method data exists, go to profile_method
+		__ test_method_data_pointer(FSR, *profile_method);
+	}
+
+	__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterInvocationLimit)));
+	__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterInvocationLimit)));
+	__ slt(AT, T3, AT);
+	__ beq(AT, ZERO, *overflow);
+	__ delayed()->nop();
+}
+
+void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
+
+	// Asm interpreter on entry
+	// S7 - locals
+	// S0 - bcp
+	// T7 - method
+	// FP - interpreter frame
+
+	// On return (i.e. jump to entry_point)
+	// T7 - method
+	// RA - return address of interpreter caller
+	// tos - the last parameter to Java method
+	// SP - sender_sp
+	const Address size_of_parameters(T7,in_bytes( methodOopDesc::size_of_parameters_offset()));
+
+	// the bcp is valid if and only if it's not null
+	__ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+		  InterpreterRuntime::frequency_counter_overflow), ZERO);
+	__ lw(T7, FP, method_offset);
+/*
+  // method has been compiled - remove activation frame
+  // (leave return address on stack) and continue at
+  // verified entry point (eax). (eax in some past life maybe, seems to use methodoop these days)
+  //
+  // Note: continuation at verified entry point works if the method that has been
+  //       compiled is the right one (in case of virtual calls); i.e., the inline
+  //       cache check must have happened before the invocation counter overflow
+  //       check.
+	__ lhu(V0, size_of_parameters);
+	__ move(SP, FP);
+	__ lw(FP, SP, frame::interpreter_frame_sender_fp_offset * wordSize);
+	__ lw(RA, SP, frame::interpreter_frame_return_addr_offset * wordSize);
+	__ sll(V0, V0, 2);
+	__ addi(V0, V0, - 1 * wordSize);
+	__ sub(SP, LVP, V0);
+//	__ lw(T0, LVP, 0);
+*/
+  // Preserve invariant that esi/edi contain bcp/locals of sender frame
+	__ b(*do_continue);
+	__ delayed()->nop();
+}
+
+// See if we've got enough room on the stack for locals plus overhead.
+// The expression stack grows down incrementally, so the normal guard
+// page mechanism will work for that.
+//
+// NOTE: Since the additional locals are also always pushed (wasn't
+// obvious in generate_method_entry) so the guard should work for them
+// too.
+//
+// Args:
+//      rdx: number of additional locals this frame needs (what we must check)
+//      rbx: methodOop
+//
+// Kills:
+//      rax
+void InterpreterGenerator::generate_stack_overflow_check(void) {
+	// see if we've got enough room on the stack for locals plus overhead.
+	// the expression stack grows down incrementally, so the normal guard
+	// page mechanism will work for that.
+	//
+	// Registers live on entry:
+	//
+	// T2: number of additional locals this frame needs (what we must check)
+	// T7: methodOop
+
+	// destroyed on exit
+	// T1, T3, T4
+
+	// NOTE:  since the additional locals are also always pushed (wasn't obvious in
+	// generate_method_entry) so the guard should work for them too.
+	//
+
+	// monitor entry size: see picture of stack set (generate_method_entry) and frame_i486.hpp
+	const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
+
+	// total overhead size: entry_size + (saved ebp thru expr stack bottom).
+	// be sure to change this if you add/subtract anything to/from the overhead area
+	const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
+		+ entry_size;
+
+	const int page_size = os::vm_page_size();
+
+	Label after_frame_check;
+
+	// see if the frame is greater than one page in size. If so,
+	// then we need to verify there is enough stack space remaining
+	// for the additional locals.
+	__ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize());
+	__ slt(AT, AT, T2);
+	__ beq(AT, ZERO, after_frame_check);
+	__ delayed()->nop();
+
+	// compute sp as if this were going to be the last frame on
+	// the stack before the red zone
+#ifndef OPT_THREAD
+	Register thread = T1;
+	__ get_thread(thread);
+#else
+	Register thread = TREG;
+#endif
+
+	// locals + overhead, in bytes
+	__ sll(T4, T2, Interpreter::stackElementScale());
+	__ addiu(T4, T4, overhead_size); 	// locals * 4 + overhead_size --> T4
+
+#ifdef ASSERT
+	Label stack_base_okay, stack_size_okay;
+	// verify that thread stack base is non-zero
+	__ lw(T3, thread, in_bytes(Thread::stack_base_offset()));
+	__ bne(T3, ZERO, stack_base_okay);
+	__ delayed()->nop();
+	__ stop("stack base is zero");
+	__ bind(stack_base_okay);
+	// verify that thread stack size is non-zero
+	__ lw(T3, thread, in_bytes(Thread::stack_size_offset()));
+	__ bne(T3, ZERO, stack_size_okay);
+	__ delayed()->nop();
+	__ stop("stack size is zero");
+	__ bind(stack_size_okay);
+#endif
+
+	// Add stack base to locals and subtract stack size
+	__ lw(T3, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> T3
+	__ add(T4, T4, T3); 	// locals * 4 + overhead_size + stack_base--> T4
+	__ lw(T3, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> T3
+	__ sub(T4, T4, T3);	// locals * 4 + overhead_size + stack_base - stack_size --> T4
+
+
+	// add in the redzone and yellow size
+	__ move(AT, (StackRedPages+StackYellowPages) * page_size);
+	__ add(T4, T4, AT);
+
+	// check against the current stack bottom
+	__ slt(AT, T4, SP);
+	__ bne(AT, ZERO, after_frame_check);
+	__ delayed()->nop();
+        // x86 version pop saved bcp and return address here, FIXME
+	__ jmp(Interpreter::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+
+	// all done with frame size check
+	__ bind(after_frame_check);
+}
+
+// Allocate monitor and lock method (asm interpreter)
+// T7 - methodOop
+void InterpreterGenerator::lock_method(void) {
+  // synchronize method
+	const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+#ifdef ASSERT
+	{ Label L;
+		__ lw(T0, T7, in_bytes(methodOopDesc::access_flags_offset()));
+		__ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
+		__ bne(T0, ZERO, L);
+		__ delayed()->nop();
+		__ stop("method doesn't need synchronization");
+		__ bind(L);
+	}
+#endif // ASSERT
+	// get synchronization object
+	{ Label done;
+		const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes()
+			+ Klass::java_mirror_offset_in_bytes();
+		__ lw(T0, T7, in_bytes(methodOopDesc::access_flags_offset()));
+		__ andi(T2, T0, JVM_ACC_STATIC);
+		__ lw(T0, LVP, Interpreter::local_offset_in_bytes(0));
+		__ beq(T2, ZERO, done);
+		__ delayed()->nop();
+		__ lw(T0, T7, in_bytes(methodOopDesc::constants_offset()));
+		__ lw(T0, T0, constantPoolOopDesc::pool_holder_offset_in_bytes());
+		__ lw(T0, T0, mirror_offset);
+		__ bind(done);
+	}
+	// add space for monitor & lock
+	__ addi(SP, SP, (-1) * entry_size);           // add space for a monitor entry
+	__ sw(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+	// set new monitor block top
+	__ sw(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
+	// FIXME: I do not know what lock_object will do and what it will need
+	__ move(T6, SP);      // object address
+	__ lock_object(T6);
+}
+
+// Generate a fixed interpreter frame. This is identical setup for
+// interpreted methods and for native methods hence the shared code.
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+
+	// [ local var m-1      ] <--- sp
+	//   ...
+	// [ local var 0        ]
+	// [ argumnet word n-1  ] <--- T0(sender's sp)
+	//   ...
+	// [ argument word 0    ] <--- S7
+
+	// initialize fixed part of activation frame
+	// sender's sp in T5
+	int i = 0;
+	__ sw(RA, SP, (-1) * wordSize); 	// save return address
+	__ sw(FP, SP, (-2) * wordSize);	// save sender's fp
+	__ addiu(FP, SP, (-2) * wordSize);
+	//__ sw(T0, FP, (-++i) * wordSize);	// save sender's sp
+	__ sw(T5, FP, (-++i) * wordSize);	// save sender's sp
+	__ sw(ZERO,FP,(-++i)*wordSize);       //save last_sp as null, FIXME aoqi
+	__ sw(LVP, FP, (-++i) * wordSize);	// save locals offset
+	__ lw(BCP, T7, in_bytes(methodOopDesc::const_offset())); // get constMethodOop
+	__ addiu(BCP, BCP, in_bytes(constMethodOopDesc::codes_offset())); // get codebase
+	__ sw(T7, FP, (-++i) * wordSize);                              // save methodOop
+#ifndef CORE
+	if (ProfileInterpreter) {
+		Label method_data_continue;
+		__ lw(AT, T7,  in_bytes(methodOopDesc::method_data_offset()));
+		__ beq(AT, ZERO, method_data_continue);
+		__ delayed()->nop();
+		__ addi(AT, AT, in_bytes(methodDataOopDesc::data_offset()));
+		__ bind(method_data_continue);
+		__ sw(AT, FP,  (-++i) * wordSize);
+	} else {
+		__ sw(ZERO, FP, (-++i) * wordSize);
+	}
+#endif // !CORE
+
+	__ lw(T2, T7, in_bytes(methodOopDesc::constants_offset()));
+	__ lw(T2, T2, constantPoolOopDesc::cache_offset_in_bytes());
+	__ sw(T2, FP, (-++i) * wordSize);                    // set constant pool cache
+	if (native_call) {
+		__ sw(ZERO, FP, (-++i) * wordSize);					// no bcp
+	} else {
+		__ sw(BCP, FP, (-++i) * wordSize);					// set bcp
+	}
+	__ addiu(SP, FP, (-++i) * wordSize);
+	__ sw(SP, FP, (-i) * wordSize);               // reserve word for pointer to expression stack bottom
+}
+
+// End of helpers
+
+// Various method entries
+//------------------------------------------------------------------------------------------------------------------------
+//
+//
+
+// Call an accessor method (assuming it is resolved, otherwise drop
+// into vanilla (slow path) entry
+address InterpreterGenerator::generate_accessor_entry(void) {
+
+	// T7: methodOop
+	// V0: receiver (preserve for slow entry into asm interpreter)
+	//  T5: senderSP must preserved for slow path, set SP to it on fast path
+
+	address entry_point = __ pc();
+	Label xreturn_path;
+	// do fastpath for resolved accessor methods
+	if (UseFastAccessorMethods) {
+		Label slow_path;
+		//	  __ cmpl(Address((int)SafepointSynchronize::address_of_state(),
+		//	relocInfo::none), SafepointSynchronize::_not_synchronized);
+		//	  __ jcc(Assembler::notEqual, slow_path);
+		__ move(T2, (int)SafepointSynchronize::address_of_state());
+		__ lw(AT,T2, 0);
+		__ addi(AT,AT,-(SafepointSynchronize::_not_synchronized));
+		__ bne(AT,ZERO,slow_path);
+		__ delayed()->nop();
+		// Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof;
+		// parameter size = 1
+		// Note: We can only use this code if the getfield has been resolved
+		//       and if we don't have a null-pointer exception => check for
+		//       these conditions first and use slow path if necessary.
+		// T7: method
+		// V0: receiver
+
+		// [ receiver  ] <-- sp
+		__ lw(T0, SP, 0);
+
+		// check if local 0 != NULL and read field
+		__ beq(T0, ZERO, slow_path);
+		__ delayed()->nop();
+		__ lw(T2, T7, in_bytes(methodOopDesc::constants_offset()));
+		// read first instruction word and extract bytecode @ 1 and index @ 2
+		__ lw(T3, T7, in_bytes(methodOopDesc::const_offset()));
+		__ lw(T3, T3, in_bytes(constMethodOopDesc::codes_offset()));
+		// Shift codes right to get the index on the right.
+		// The bytecode fetched looks like <index><0xb4><0x2a>
+		__ srl(T3, T3, 2*BitsPerByte);
+		// FIXME: maybe it's wrong
+		__ sll(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size())));
+		__ lw(T2, T2, constantPoolOopDesc::cache_offset_in_bytes());
+
+		// T0: local 0 eax
+		// T7: method ebx
+		// V0: receiver - do not destroy since it is needed for slow path! ecx
+		// ecx: scratch use which register instead ?
+		// T6: scratch use which register instead ?
+		// T3: constant pool cache index	edx
+		// T2: constant pool cache	edi
+		// esi: send's sp
+		// T5: send's sp
+		// check if getfield has been resolved and read constant pool cache entry
+		// check the validity of the cache entry by testing whether _indices field
+		// contains Bytecode::_getfield in b1 byte.
+		assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below");
+		//    __ movl(esi,
+		//	    Address(edi,
+		//		    edx,
+		//		    Address::times_4, constantPoolCacheOopDesc::base_offset()
+		//		    + ConstantPoolCacheEntry::indices_offset()));
+
+
+		__ sll(T4, T3, Address::times_4);
+		__ move(T6, in_bytes(constantPoolCacheOopDesc::base_offset()
+					+ ConstantPoolCacheEntry::indices_offset()));
+		__ add(T6, T4, T6);
+		__ add(T6, T6, T2);
+		__ lw(T6, T6, 0);
+		__ srl(T6, T6, 2*BitsPerByte);
+		__ andi(T6, T6, 0xFF);
+		__ addi(T6, T6, (-1) * Bytecodes::_getfield);
+		__ bne(T6, ZERO, slow_path);
+		__ delayed()->nop();
+
+		//    __ shrl(esi, 2*BitsPerByte);
+		//    __ andl(esi, 0xFF);
+		//    __ cmpl(esi, Bytecodes::_getfield);
+		//    __ jcc(Assembler::notEqual, slow_path);
+
+		// Note: constant pool entry is not valid before bytecode is resolved
+
+		//    __ movl(esi,
+		//	    Address(edi,
+		//		    edx,
+		//		    Address::times_4, constantPoolCacheOopDesc::base_offset()
+		//		    + ConstantPoolCacheEntry::f2_offset()));
+		__ move(T6, in_bytes(constantPoolCacheOopDesc::base_offset()
+					+ ConstantPoolCacheEntry::f2_offset()));
+		__ add(T6, T6, T4);
+		__ add(T6, T6, T2);
+		__ lw(AT, T6, 0);
+		//    __ movl(edx,
+		//	    Address(edi,
+		//		    edx,
+		//		    Address::times_4, constantPoolCacheOopDesc::base_offset()
+		//		    + ConstantPoolCacheEntry::flags_offset()));
+
+
+		__ move(T6, in_bytes(constantPoolCacheOopDesc::base_offset()
+					+ ConstantPoolCacheEntry::flags_offset()));
+		__ add(T6, T6, T4);
+		__ add(T6, T6, T2);
+		__ lw(T3, T6, 0);
+
+		Label notByte, notShort, notChar;
+		//    const Address field_address (eax, esi, Address::times_1);
+
+		// Need to differentiate between igetfield, agetfield, bgetfield etc.
+		// because they are different sizes.
+		// Use the type from the constant pool cache
+		__ srl(T3, T3, ConstantPoolCacheEntry::tosBits);
+		// Make sure we don't need to mask edx for tosBits after the above shift
+		ConstantPoolCacheEntry::verify_tosBits();
+		// btos = 0
+		__ bne(T3, ZERO, notByte);
+		__ delayed()->add(T0, T0, AT);
+
+		__ lb(V0, T0, 0);
+		__ b(xreturn_path);
+		__ delayed()->nop();
+
+		__ bind(notByte);
+		__ addi(T6, T3, (-1) * stos);
+		__ bne(T6, ZERO, notShort);
+		__ delayed()->nop();
+		__ lh(V0, T0, 0);
+		__ b(xreturn_path);
+		__ delayed()->nop();
+
+		__ bind(notShort);
+		__ addi(T6, T3, (-1) * ctos);
+		__ bne(T6, ZERO, notChar);
+		__ delayed()->nop();
+		__ lhu(V0, T0, 0);
+		__ b(xreturn_path);
+		__ delayed()->nop();
+
+		__ bind(notChar);
+#ifdef ASSERT
+		Label okay;
+		__ addi(T6, T3, (-1) * atos);
+		__ beq(T6, ZERO, okay);
+		__ delayed()->addi(T6, T3, (-1) * itos);
+		__ beq(T6, ZERO, okay);
+		__ delayed()->nop();
+		__ stop("what type is this?");
+		__ bind(okay);
+#endif // ASSERT
+		// All the rest are a 32 bit wordsize
+		__ lw(V0, T0, 0);
+
+		__ bind(xreturn_path);
+
+		// _ireturn/_areturn
+		//FIXME
+		 __ move(SP,T5);//FIXME, set sender's fp to SP
+		__ jr(RA);
+		__ delayed()->nop();
+
+		// generate a vanilla interpreter entry as the slow path
+		__ bind(slow_path);
+		(void) generate_normal_entry(false);
+	} else {
+		(void) generate_normal_entry(false);
+	}
+
+	return entry_point;
+}
+
+// Interpreter stub for calling a native method. (asm interpreter)
+// This sets up a somewhat different looking stack for calling the
+// native method than the typical interpreter frame setup.
+address InterpreterGenerator::generate_native_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter  = UseCompiler || CountCompiledCalls;
+	//esi: sender's sp
+	//T5: sender's sp
+	// T7: methodOop
+	address entry_point = __ pc();
+
+#ifndef CORE
+	const Address invocation_counter(T7,in_bytes(methodOopDesc::invocation_counter_offset() +
+				InvocationCounter::counter_offset()));
+#endif
+
+	// get parameter size (always needed)
+	// the size in the java stack
+	__ lhu(V0, T7, in_bytes(methodOopDesc::size_of_parameters_offset()));
+
+	// native calls don't need the stack size check since they have no expression stack
+	// and the arguments are already on the stack and we only add a handful of words
+	// to the stack
+
+	// T7: methodOop
+	// V0: size of parameters
+	// Layout of frame at this point
+	//
+	// [ argument word n-1  ] <--- sp
+	//   ...
+	// [ argument word 0    ]
+
+	// for natives the size of locals is zero
+
+	// compute beginning of parameters (S7)
+	__ sll(LVP, V0, Interpreter::stackElementScale());
+	__ addiu(LVP, LVP, (-1) * wordSize);
+	__ add(LVP, LVP, SP);
+
+	//__ move(T0, SP);               // remember sender sp for generate_fixed_frame
+
+
+	// add 2 zero-initialized slots for native calls
+	__ addi(SP, SP, (-2) * wordSize);
+	__ sw(ZERO, SP, 1 * wordSize);	// slot for native oop temp offset (setup via runtime)
+	__ sw(ZERO, SP, 0 * wordSize);	// slot for static native result handler3 (setup via runtime)
+
+	// Layout of frame at this point
+	// [ method holder mirror	] <--- sp
+	// [ result type info			]
+	// [ argument word n-1   	] <--- T0
+	//   ...
+	// [ argument word 0    	] <--- LVP
+
+
+#ifndef CORE
+	if (inc_counter) __ lw(T3, invocation_counter);  // (pre-)fetch invocation count
+#endif
+
+	// initialize fixed part of activation frame
+	generate_fixed_frame(true);
+        // jerome_for_debug
+	//__ move(AT, (int)(&jerome1));
+	//__ sw(FP, AT, 0);
+	// after this function, the layout of frame is as following
+	//
+	// [ monitor block top        ] <--- sp ( the top monitor entry )
+	// [ byte code pointer (0)    ] (if native, bcp = 0)
+	// [ constant pool cache      ]
+	// [ methodOop                ]
+	// [ locals offset            ]
+	// [ sender's sp              ]
+	// [ sender's fp              ]
+	// [ return address           ] <--- fp
+	// [ method holder mirror     ]
+	// [ result type info         ]
+	// [ argumnet word n-1        ] <--- sender's sp
+	//	 ...
+	// [ argument word 0          ] <--- S7
+
+
+	// make sure method is native & not abstract
+#ifdef ASSERT
+	__ lw(T0, T7, in_bytes(methodOopDesc::access_flags_offset()));
+	{
+		Label L;
+		__ andi(T2, T0, JVM_ACC_NATIVE);
+		__ bne(T2, ZERO, L);
+		__ delayed()->nop();
+		__ stop("tried to execute native method as non-native");
+		__ bind(L);
+	}
+	{ Label L;
+		__ andi(T2, T0, JVM_ACC_ABSTRACT);
+		__ beq(T2, ZERO, L);
+		__ delayed()->nop();
+		__ stop("tried to execute abstract method in interpreter");
+		__ bind(L);
+	}
+#endif
+
+	// Since at this point in the method invocation the exception handler
+	// would try to exit the monitor of synchronized methods which hasn't
+	// been entered yet, we set the thread local variable
+	// _do_not_unlock_if_synchronized to true. The remove_activation will
+	// check this flag.
+	Register thread = TREG;
+#ifndef OPT_THREAD
+	__ get_thread(thread);
+#endif
+	__ move(AT, (int)true);
+	__ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+#ifndef CORE
+	// increment invocation count & check for overflow
+	Label invocation_counter_overflow;
+	if (inc_counter) {
+		generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+	}
+	Label continue_after_compile;
+	__ bind(continue_after_compile);
+#endif // CORE
+
+	bang_stack_shadow_pages(true);
+
+	// reset the _do_not_unlock_if_synchronized flag
+#ifndef OPT_THREAD
+	__ get_thread(thread);
+#endif
+	__ sb(ZERO, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+	// check for synchronized methods
+	// Must happen AFTER invocation_counter check and stack overflow check,
+	// so method is not locked if overflows.
+	if (synchronized) {
+		lock_method();
+	} else {
+		// no synchronization necessary
+#ifdef ASSERT
+		{
+			Label L;
+			__ lw(T0, T7, in_bytes(methodOopDesc::access_flags_offset()));
+			__ andi(T2, T0, JVM_ACC_SYNCHRONIZED);
+			__ beq(T2, ZERO, L);
+			__ delayed()->nop();
+			__ stop("method needs synchronization");
+			__ bind(L);
+		}
+#endif
+	}
+
+	// after method_lock, the layout of frame is as following
+	//
+	// [ monitor entry            ] <--- sp
+	//   ...
+	// [ monitor entry            ]
+	// [ monitor block top        ] ( the top monitor entry )
+	// [ byte code pointer (0)    ] (if native, bcp = 0)
+	// [ constant pool cache      ]
+	// [ methodOop                ]
+	// [ locals offset	      ]
+	// [ sender's sp              ]
+	// [ sender's fp              ]
+	// [ return address           ] <--- fp
+	// [ method holder mirror     ]
+	// [ result type info         ]
+	// [ argumnet word n-1        ] <--- ( sender's sp )
+	//	 ...
+	// [ argument word 0          ] <--- S7
+
+	// start execution
+#ifdef ASSERT
+	{ Label L;
+		__ lw(T0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+		__ beq(T0, SP, L);
+		__ delayed()->nop();
+		__ stop("broken stack frame setup in interpreter in asm");
+		__ bind(L);
+	}
+#endif
+
+	// jvmti/jvmpi support
+	__ notify_method_entry();
+
+	// work registers
+	const Register method = T7;
+	//const Register thread = T2;
+	const Register t      = T3;
+
+	// allocate space for parameters
+	__ get_method(method);
+
+	__ verify_oop(method);
+/*
+	// jerome_for_debug
+	__ move(AT, (int)(&jerome10));
+	__ sw(SP, AT, 0);
+	 // jerome_for_debug
+	__ move(AT, (int)(&jerome9));
+	__ sw(ZERO, AT, 0);
+*/
+	__ lhu(t, method, in_bytes(methodOopDesc::size_of_parameters_offset()));
+	// FIXME : to align long/double parameter, we reserve as much as
+	// two the size of the actual needed
+	//FIXME, jdk6 allocate 2 more word  here ,@jerome
+	//	__ shl(t, Interpreter::logStackElementSize());
+/*
+	// jerome_for_debug
+	__ move(AT, (int)(&jerome9));
+	__ sw(t, AT, 0);
+*/
+	__ shl(t, 3);
+	__ addi(t,t,2*wordSize);//for JNIEnv and mirror
+	__ sub(SP, SP, t);
+	__ move(AT, -8);
+	__ andr(SP, SP, AT);
+	// [			      ] <--- sp
+	//   ...                        size of parameters
+	// [ monitor entry            ]
+	//   ...
+	// [ monitor entry            ]
+	// [ monitor block top        ] ( the top monitor entry )
+	// [ byte code pointer (0)    ] (if native, bcp = 0)
+	// [ constant pool cache      ]
+	// [ methodOop                ]
+	// [ locals offset            ]
+	// [ sender's sp              ]
+	// [ sender's fp              ]
+	// [ return address           ] <--- fp
+	// [ method holder mirror     ]
+	// [ result type info         ]
+	// [ argumnet word n-1        ] <--- ( sender's sp )
+	//	 ...
+	// [ argument word 0          ] <--- LVP
+
+	// get signature handler
+	{ Label L;
+		__ lw(T9, method, in_bytes(methodOopDesc::signature_handler_offset()));
+		__ bne(T9, ZERO, L);
+		__ delayed()->nop();
+		__ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+					InterpreterRuntime::prepare_native_call), method);
+		__ get_method(method);
+		__ lw(T9, method, in_bytes(methodOopDesc::signature_handler_offset()));
+		__ bind(L);
+	}
+
+	// call signature handler
+	// FIXME: when change codes in InterpreterRuntime, note this point
+	// from: begin of parameters
+	assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
+	// to: current sp
+	assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
+	// temp: T3
+	assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
+
+	__ jalr(T9);
+	__ delayed()->nop();
+	__ get_method(method);	// slow path call blows EBX on DevStudio 5.0
+
+	/*
+	   if native function is static, and its second parameter has type length of double word,
+	   and first parameter has type length of word, we have to reserve one word
+	   for the first parameter, according to mips o32 abi.
+	   if native function is not static, and its third parameter has type length of double word,
+	   and second parameter has type length of word, we have to reserve one word for the second
+	   parameter.
+	 */
+
+
+	// result handler is in V0
+	// set result handler
+	__ sw(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
+
+#define FIRSTPARA_SHIFT_COUNT 5
+#define SECONDPARA_SHIFT_COUNT 9
+#define THIRDPARA_SHIFT_COUNT 13
+#define PARA_MASK	0xf
+
+	// pass mirror handle if static call
+	{
+		Label L;
+		const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes()
+			+ Klass::java_mirror_offset_in_bytes();
+		__ lw(t, method, in_bytes(methodOopDesc::access_flags_offset()));
+		__ andi(t, t, JVM_ACC_STATIC);
+		__ beq(t, ZERO, L);
+		__ delayed()->nop();
+
+		// get mirror
+		__ lw(t, method, in_bytes(methodOopDesc:: constants_offset()));
+		__ lw(t, t, constantPoolOopDesc::pool_holder_offset_in_bytes());
+		__ lw(t, t, mirror_offset);
+		// copy mirror into activation frame
+		//__ sw(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+		// pass handle to mirror
+		__ st_ptr(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+		__ addi(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+//		__ ld_ptr(t,Address(SP ,wordSize));
+		__ st_ptr(t, Address(SP, wordSize));
+		__ move(A1, t);
+		__ bind(L);
+	}
+
+	// [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
+	// [                          ]                              |
+	//   ...                        size of parameters(or +1)    |
+	// [ monitor entry            ]                              |
+	//   ...                                                     |
+	// [ monitor entry            ]                              |
+	// [ monitor block top        ] ( the top monitor entry )    |
+	// [ byte code pointer (0)    ] (if native, bcp = 0)         |
+	// [ constant pool cache      ]                              |
+	// [ methodOop                ]                              |
+	// [ locals offset            ]                              |
+	// [ sender's sp              ]                              |
+	// [ sender's fp              ]                              |
+	// [ return address           ] <--- fp                      |
+	// [ method holder mirror     ] <----------------------------|
+	// [ result type info         ]
+	// [ argumnet word n-1        ] <--- ( sender's sp )
+	//	 ...
+	// [ argument word 0          ] <--- S7
+
+	// get native function entry point
+	{ Label L;
+		__ lw(T9, method, in_bytes(methodOopDesc::native_function_offset()));
+		__ move(V1, (uintptr_t) SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
+		__ bne(V1, T9, L);
+		__ delayed()->nop();
+		__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
+		__ get_method(method);
+		__ verify_oop(method);
+		__ lw(T9, method, in_bytes(methodOopDesc::native_function_offset()));
+		__ bind(L);
+	}
+
+	// pass JNIEnv
+	// native function in T9
+#ifndef OPT_THREAD
+	__ get_thread(thread);
+#endif
+	__ addi(t, thread, in_bytes(JavaThread::jni_environment_offset()));
+	//__ addi(SP, SP, (-1) * wordSize);
+        //__ sw(t, SP, 0);
+	// stack,but I think it won't work when pass float,double etc @jerome,10/17,2006
+	__ move(A0, t);
+	// [ jni environment          ] <--- sp
+	// [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
+	// [                          ]                              |
+	//   ...                        size of parameters           |
+	// [ monitor entry            ]                              |
+	//   ...                                                     |
+	// [ monitor entry            ]                              |
+	// [ monitor block top        ] ( the top monitor entry )    |
+	// [ byte code pointer (0)    ] (if native, bcp = 0)         |
+	// [ constant pool cache      ]                              |
+	// [ methodOop                ]                              |
+	// [ locals offset            ]                              |
+	// [ sender's sp              ]                              |
+	// [ sender's fp              ]                              |
+	// [ return address           ] <--- fp                      |
+	// [ method holder mirror     ] <----------------------------|
+	// [ result type info         ]
+	// [ argumnet word n-1        ] <--- ( sender's sp )
+	//	 ...
+	// [ argument word 0          ] <--- S7
+
+	/*
+	// reset handle block
+	__ lw(t, thread, in_bytes(JavaThread::active_handles_offset()));
+	__ sw(ZERO, t, JNIHandleBlock::top_offset_in_bytes());
+
+        */
+	// set_last_Java_frame_before_call
+	__ sw(FP, thread, in_bytes(JavaThread::last_Java_fp_offset()));
+	 //set_last_Java_frame_before_call
+	// It is enough that the pc()
+	// points into the right code segment. It does not have to be the correct return pc.
+	//__ set_last_Java_frame(thread, noreg, FP, __ pc());
+	// change thread state
+#ifdef ASSERT
+	{ Label L;
+		__ lw(t, thread, in_bytes(JavaThread::thread_state_offset()));
+		__ addi(t, t, (-1) * _thread_in_Java);
+		__ beq(t, ZERO, L);
+		__ delayed()->nop();
+		__ stop("Wrong thread state in native stub");
+		__ bind(L);
+	}
+#endif
+
+	// Change state to native (we save the return address in the thread, since it might not
+	// be pushed on the stack when we do a a stack traversal). It is enough that the pc()
+	// points into the right code segment. It does not have to be the correct return pc.
+	__ move(t, (int) __ pc());
+//	__ sw(t, thread, in_bytes(JavaThread::frame_anchor_offset()
+//			+ JavaFrameAnchor::last_Java_pc_offset()));
+	__ sw(t, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+	__ sw(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+
+	__ move(t, _thread_in_native);
+	__ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
+
+	// FIXME: parameter's format has not been determined
+	// it will be wrong when pass float, double parameter @jerome ,10/17,2006
+
+//	__ warn("get signature handler");
+	__ lw(A1, SP, 1 * wordSize);
+	__ lw(A2, SP, 2 * wordSize);
+	__ lw(A3, SP, 3 * wordSize);
+
+	// call native method
+	__ jalr(T9);
+	__ delayed()->nop();
+	// result potentially in V0:V1 or F0:F1
+	/*
+	   __ get_method(method);
+#ifndef OPT_THREAD
+	__ get_thread(thread);
+#endif
+        */
+//#ifdef COMPILER2
+//#endif
+//jerome_for_debug
+	//__ move(AT, (int)(&jerome2));
+	//__ sw(FP, AT, 0);
+
+
+	if (CheckJNICalls) {
+		//FIXME
+		//	 __ call(StubRoutines::gs2::verify_fpu_cntrl_wrd_entry(),
+		//	 relocInfo::runtime_call_type);
+	}
+
+	// restore S0 to have legal interpreter frame, i.e., bci == 0 <=> S0 == code_base()
+	//__ lw(BCP, method, in_bytes(methodOopDesc::const_offset())); // get constMethodOop
+	//__ addi(BCP, BCP, in_bytes(constMethodOopDesc::codes_offset()));    // get codebase
+
+	// via _last_native_pc and not via _last_jave_sp
+	// NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
+	//  If the order changes or anything else is added to the stack the code in
+	// interpreter_frame_result will have to be changed.
+	//FIXME, should modify here
+	// save return value to keep the value from being destroyed by other calls
+	//__ addi(SP, SP, (-4) * wordSize);
+	//__ sw(V0, SP, 3 * wordSize);
+	//__ sw(V1, SP, 2 * wordSize);
+	//__ swc1(F0, SP, 1 * wordSize);
+	//__ swc1(F1, SP, 0 * wordSize);
+	__ move(S1, V0);
+	__ move(S3, V1);
+	__ mfc1(S4, F0);
+	__ mfc1(S5, F1);
+
+	// change thread state
+	__ get_thread(thread);
+	__ move(t, _thread_in_native_trans);
+	__ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
+
+	if( os::is_MP() ) __ sync(); // Force this write out before the read below
+
+	// check for safepoint operation in progress and/or pending suspend requests
+	{ Label Continue;
+
+		// Don't use call_VM as it will see a possible pending exception and forward it
+		// and never return here preventing us from clearing _last_native_pc down below.
+		// Also can't use call_VM_leaf either as it will check to see if esi & edi are
+		// preserved and correspond to the bcp/locals pointers. So we do a runtime call
+		// by hand.
+		//
+		Label L;
+		__ move(T4, (int)SafepointSynchronize::address_of_state());
+		__ lw(T0, T4, 0);
+		__ bne(T0, ZERO, L);
+		__ delayed()->nop();
+		__ lw(T0, thread, in_bytes(JavaThread::suspend_flags_offset()));
+		__ beq(T0, ZERO, Continue);
+		__ delayed()->nop();
+		__ bind(L);
+		__ addi(SP, SP, (-1) * wordSize);
+		__ move(A0, thread);
+		__ call(CAST_FROM_FN_PTR(address,
+		             JavaThread::check_special_condition_for_native_trans),
+				  relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		__ addi(SP, SP, wordSize);
+
+	//	__ get_method(method);
+#ifndef OPT_THREAD
+		__ get_thread(thread);
+#endif
+
+		__ bind(Continue);
+	}
+
+	// change thread state
+	__ move(t, _thread_in_Java);
+	__ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
+	//__ reset_last_Java_frame(thread, true);
+	__ reset_last_Java_frame(thread, true,true);
+	// reset handle block
+	//  __ movl(t, Address(thread, JavaThread::active_handles_offset()));
+
+	__ lw(t,thread, in_bytes(JavaThread::active_handles_offset()));
+	// __ movl(Address(t, JNIHandleBlock::top_offset_in_bytes()), 0);
+	__ sw(ZERO,t, JNIHandleBlock::top_offset_in_bytes());
+	// If result was an oop then unbox and save it in the frame
+	{ Label L;
+		Label no_oop, store_result;
+		//FIXME, addi only support 16-bit imeditate
+		__ lw(AT,FP, frame::interpreter_frame_result_handler_offset*wordSize);
+		// __ addi(AT,AT,-(int)AbstractInterpreter::result_handler(T_OBJECT));
+		__ move(T6, (int)AbstractInterpreter::result_handler(T_OBJECT));
+		__ bne(AT,T6,no_oop);
+		__ delayed()->nop();
+		//__ cmpl(Address(esp), NULL_WORD);
+		//FIXME, do we need pop here ? @jerome
+		//__ pop(ltos);
+		//__ testl(eax, eax);
+		//__ jcc(Assembler::zero, store_result);
+		__ move(V0, S1);
+		__ beq(V0,ZERO,store_result);
+		__ delayed()->nop();
+		// unbox
+		__ lw(V0,V0, 0);
+		__ bind(store_result);
+		__ sw(V0,FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
+		// keep stack depth as expected by pushing oop which will eventually be discarded
+		__ bind(no_oop);
+	}
+	{
+		Label no_reguard;
+		__ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
+		//__ bne(t, JavaThread::stack_guard_yellow_disabled, no_reguard);
+		__ move(AT,(int) JavaThread::stack_guard_yellow_disabled);
+		__ bne(t, AT, no_reguard);
+		__ delayed()->nop();
+		__ pushad();
+		__ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
+				relocInfo::runtime_call_type);
+		__ delayed()->nop();
+		__ popad();
+		__ bind(no_reguard);
+	}
+	// restore esi to have legal interpreter frame,
+	// i.e., bci == 0 <=> esi == code_base()
+	// Can't call_VM until bcp is within reasonable.
+	__ get_method(method);      // method is junk from thread_in_native to now.
+	__ verify_oop(method);
+	//  __ movl(esi, Address(method,methodOopDesc::const_offset())); // get constMethodOop
+	__ lw(BCP,method,in_bytes(methodOopDesc::const_offset()));
+	// __ leal(esi, Address(esi,constMethodOopDesc::codes_offset()));    // get codebase
+	__ lea(BCP,Address(BCP, in_bytes(constMethodOopDesc::codes_offset())));
+	// handle exceptions (exception handling will handle unlocking!)
+	{
+		Label L;
+		__ lw(t, thread, in_bytes(Thread::pending_exception_offset()));
+		__ beq(t, ZERO, L);
+		__ delayed()->nop();
+		// Note: At some point we may want to unify this with the code used in
+		// call_VM_base();
+		// i.e., we should use the StubRoutines::forward_exception code. For now this
+		// doesn't work here because the esp is not correctly set at this point.
+		__ MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address,
+					InterpreterRuntime::throw_pending_exception));
+		__ should_not_reach_here();
+		__ bind(L);
+	}
+
+	// do unlocking if necessary
+	{ Label L;
+		__ lw(t, method, in_bytes(methodOopDesc::access_flags_offset()));
+		__ andi(t, t, JVM_ACC_SYNCHRONIZED);
+		__ beq(t, ZERO, L);
+		// the code below should be shared with interpreter macro assembler implementation
+		{ Label unlock;
+			// BasicObjectLock will be first in list,
+			// since this is a synchronized method. However, need
+			// to check that the object has not been unlocked by
+			// an explicit monitorexit bytecode.
+			__ delayed()->addi(T6, FP, frame::interpreter_frame_initial_sp_offset
+					* wordSize - (int)sizeof(BasicObjectLock));
+			// address of first monitor
+
+			__ lw(t, T6, BasicObjectLock::obj_offset_in_bytes());
+			__ bne(t, ZERO, unlock);
+			__ delayed()->nop();
+
+			// Entry already unlocked, need to throw exception
+			__ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
+				InterpreterRuntime::throw_illegal_monitor_state_exception));
+			__ should_not_reach_here();
+
+			__ bind(unlock);
+			__ unlock_object(T6);
+		}
+		__ bind(L);
+	}
+
+	// jvmti/jvmpi support
+	// Note: This must happen _after_ handling/throwing any exceptions since
+	//       the exception handler code notifies the runtime of method exits
+	//       too. If this happens before, method entry/exit notifications are
+	//       not properly paired (was bug - gri 11/22/99).
+	__ notify_method_exit(false, vtos, InterpreterMacroAssembler::NotifyJVMTI );
+
+	// restore potential result in V0:V1,
+	// call result handler to restore potential result in ST0 & handle result
+	//__ lw(V0, SP, 3 * wordSize);
+	//__ lw(V1, SP, 2 * wordSize);
+	//__ lwc1(F0, SP, 1 * wordSize);
+	//__ lwc1(F1, SP, 0 * wordSize);
+	//__ addi(SP, SP, 4 * wordSize);
+	__ move(V0, S1);
+	__ move(V1, S3);
+	__ mtc1(S4, F0);
+	__ mtc1(S5, F1);
+	__ lw(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
+	__ jalr(t);
+	__ delayed()->nop();
+//jerome_for_debug
+	//__ move(AT, (int)(&jerome4));
+	//__ sw(FP, AT, 0);
+
+
+	// remove activation
+	__ lw(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
+	__ lw(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
+	__ lw(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
+	__ jr(RA);
+	__ delayed()->nop();
+
+#ifndef CORE
+	if (inc_counter) {
+		// Handle overflow of counter and compile method
+		__ bind(invocation_counter_overflow);
+		generate_counter_overflow(&continue_after_compile);
+		// entry_point is the beginning of this
+		// function and checks again for compiled code
+	}
+#endif
+	return entry_point;
+}
+
+//
+// Generic interpreted method entry to (asm) interpreter
+//
+// Layout of frame just at the entry
+//
+//   [ argument word n-1	] <--- sp
+//     ...
+//   [ argument word 0  	]
+// assume methodOop in T7 before call this method.
+// prerequisites to the generated stub : the callee methodOop in T7
+// note you must save the caller bcp before call the generated stub
+//
+address InterpreterGenerator::generate_normal_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter  = UseCompiler || CountCompiledCalls;
+
+	// T7: methodOop
+	// T5: sender 's sp
+	address entry_point = __ pc();
+/*
+#ifndef CORE
+	// check if compiled code exists
+	Label run_compiled_code;
+	if (!CompileTheWorld) {
+	check_for_compiled_code(run_compiled_code);
+	}
+#endif
+*/
+#ifndef CORE
+	const Address invocation_counter(T7,
+	in_bytes(methodOopDesc::invocation_counter_offset() + InvocationCounter::counter_offset()));
+#endif
+
+	// get parameter size (always needed)
+	__ lhu(V0, T7, in_bytes(methodOopDesc::size_of_parameters_offset()));
+
+	// T7: methodOop
+	// V0: size of parameters
+	// T5: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
+	// get size of locals in words to T2
+	__ lhu(T2, T7, in_bytes(methodOopDesc::size_of_locals_offset()));
+	// T2 = no. of additional locals, locals include parameters
+	__ sub(T2, T2, V0);
+
+	// see if we've got enough room on the stack for locals plus overhead.
+	// Layout of frame at this point
+	//
+	// [ argument word n-1  ] <--- sp
+	//   ...
+	// [ argument word 0  	]
+	generate_stack_overflow_check();
+	// after this function, the layout of frame does not change
+
+	// compute beginning of parameters (S7)
+	__ sll(LVP, V0, Interpreter::stackElementScale());
+	__ addiu(LVP, LVP, (-1) * wordSize);
+	__ add(LVP, LVP, SP);
+	// remember current sp
+	//__ move(T0, SP);		// SP --> T0
+
+	// T2 - # of additional locals
+	// allocate space for locals
+	// explicitly initialize locals
+	{
+		Label exit, loop;
+		// for test
+	//	__ slt(AT, ZERO, T2);
+	//	__ beq(AT, ZERO, exit);
+		__ beq(T2, ZERO, exit);
+		__ delayed()->nop();
+		__ bind(loop);
+		if(TaggedStackInterpreter)__ addi(SP, SP, -1 * wordSize);
+		__ sw(ZERO, SP, -1 * wordSize);     // initialize local variables
+		__ addiu(T2, T2, -1);               // until everything initialized
+		__ bne(T2, ZERO, loop);
+	//	__ slt(AT, ZERO, T2);
+	//	__ bne(AT, ZERO, loop);
+		__ delayed();
+		__ addiu(SP, SP, (-1) * wordSize); //fill delay slot
+		__ bind(exit);
+	}
+
+#ifndef CORE
+	if (inc_counter) __ lw(T3, invocation_counter);  // (pre-)fetch invocation count
+#endif
+	//
+	// [ local var m-1	] <--- sp
+	//   ...
+	// [ local var 0	]
+	// [ argument word n-1	] <--- T0
+	//   ...
+	// [ argument word 0  	] <--- S7
+
+	// initialize fixed part of activation frame
+
+	generate_fixed_frame(false);
+
+
+	// after this function, the layout of frame is as following
+	//
+	// [ monitor block top        ] <--- sp ( the top monitor entry )
+	// [ byte code pointer        ] (if native, bcp = 0)
+	// [ constant pool cache      ]
+	// [ methodOop                ]
+	// [ locals offset						]
+	// [ sender's sp              ]
+	// [ sender's fp              ] <--- fp
+	// [ return address           ]
+	// [ local var m-1            ]
+	//   ...
+	// [ local var 0              ]
+	// [ argumnet word n-1        ] <--- ( sender's sp )
+	//   ...
+	// [ argument word 0          ] <--- S7
+
+
+	// make sure method is not native & not abstract
+#ifdef ASSERT
+	__ lw(T0, T7, in_bytes(methodOopDesc::access_flags_offset()));
+	{
+		Label L;
+		__ andi(T2, T0, JVM_ACC_NATIVE);
+		__ beq(T2, ZERO, L);
+		__ delayed()->nop();
+		__ stop("tried to execute native method as non-native");
+		__ bind(L);
+	}
+	{ Label L;
+		__ andi(T2, T0, JVM_ACC_ABSTRACT);
+		__ beq(T2, ZERO, L);
+		__ delayed()->nop();
+		__ stop("tried to execute abstract method in interpreter");
+		__ bind(L);
+	}
+#endif
+
+  // Since at this point in the method invocation the exception handler
+  // would try to exit the monitor of synchronized methods which hasn't
+  // been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation will
+  // check this flag.
+
+#ifndef OPT_THREAD
+	Register thread = T0;
+	__ get_thread(T0);
+#else
+	Register thread = TREG;
+#endif
+	__ move(AT, (int)true);
+	__ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+#ifndef CORE
+	// increment invocation count & check for overflow
+	Label invocation_counter_overflow;
+	Label profile_method;
+	Label profile_method_continue;
+	if (inc_counter) {
+		generate_counter_incr(&invocation_counter_overflow, &profile_method,
+				&profile_method_continue);
+		if (ProfileInterpreter) {
+			__ bind(profile_method_continue);
+		}
+	}
+
+	Label continue_after_compile;
+	__ bind(continue_after_compile);
+
+#endif // CORE
+
+	bang_stack_shadow_pages(false);
+
+	// reset the _do_not_unlock_if_synchronized flag
+#ifndef OPT_THREAD
+	__ get_thread(thread);
+#endif
+	__ sb(ZERO, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+	// check for synchronized methods
+	// Must happen AFTER invocation_counter check and stack overflow check,
+	// so method is not locked if overflows.
+	//
+	if (synchronized) {
+		// Allocate monitor and lock method
+		lock_method();
+	} else {
+		// no synchronization necessary
+#ifdef ASSERT
+		{ Label L;
+			__ lw(AT, T7, in_bytes(methodOopDesc::access_flags_offset()));
+			__ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
+			__ beq(T2, ZERO, L);
+			__ delayed()->nop();
+			__ stop("method needs synchronization");
+			__ bind(L);
+		}
+#endif
+	}
+
+	// layout of frame after lock_method
+	// [ monitor entry	      ] <--- sp
+	//   ...
+	// [ monitor entry	      ]
+	// [ monitor block top        ] ( the top monitor entry )
+	// [ byte code pointer        ] (if native, bcp = 0)
+	// [ constant pool cache      ]
+	// [ methodOop                ]
+	// [ locals offset	      ]
+	// [ sender's sp              ]
+	// [ sender's fp              ]
+	// [ return address           ] <--- fp
+	// [ local var m-1            ]
+	//   ...
+	// [ local var 0              ]
+	// [ argumnet word n-1        ] <--- ( sender's sp )
+	//   ...
+	// [ argument word 0          ] <--- S7
+
+
+	// start execution
+#ifdef ASSERT
+	{ Label L;
+		__ lw(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+		__ beq(AT, SP, L);
+		__ delayed()->nop();
+		__ stop("broken stack frame setup in interpreter in native");
+		__ bind(L);
+	}
+#endif
+
+	// jvmti/jvmpi support
+	__ notify_method_entry();
+	//jerome_for_debug
+	//__ sub(S1,FP,SP);
+	//__ move(AT, (int)(&jerome7));
+	//__ sw(S1, AT, 0);
+
+	__ dispatch_next(vtos);
+	//jerome_for_debug
+	//__ move(AT, (int)(&jerome6));
+	//__ sw(FP, AT, 0);
+
+#ifndef CORE
+	// invocation counter overflow
+	if (inc_counter) {
+		if (ProfileInterpreter) {
+			// We have decided to profile this method in the interpreter
+			__ bind(profile_method);
+
+			__ call_VM(noreg, CAST_FROM_FN_PTR(address,
+						InterpreterRuntime::profile_method), T5, true);
+
+			__ lw(T7, FP, method_offset);
+			__ lw(FSR, T7, in_bytes(methodOopDesc::method_data_offset()));
+			__ sw(FSR, T7, frame::interpreter_frame_mdx_offset * wordSize);
+			__ test_method_data_pointer(FSR, profile_method_continue);
+			__ addiu(FSR, FSR, in_bytes(methodDataOopDesc::data_offset()));
+			__ sw(FSR, FP, frame::interpreter_frame_mdx_offset * wordSize);
+			__ b(profile_method_continue);
+			__ delayed()->nop();
+		}
+		// Handle overflow of counter and compile method
+		__ bind(invocation_counter_overflow);
+		generate_counter_overflow(&continue_after_compile);
+	}
+
+#endif
+	return entry_point;
+}
+
+// Entry points
+//
+// Here we generate the various kind of entries into the interpreter.
+// The two main entry type are generic bytecode methods and native
+// call method.  These both come in synchronized and non-synchronized
+// versions but the frame layout they create is very similar. The
+// other method entry types are really just special purpose entries
+// that are really entry and interpretation all in one. These are for
+// trivial methods like accessor, empty, or special math methods.
+//
+// When control flow reaches any of the entry types for the interpreter
+// the following holds ->
+//
+// Arguments:
+//
+// T7: methodOop
+// V0: receiver
+//
+//
+// Stack layout immediately at entry
+//
+// [ parameter n-1      ] <--- sp
+//   ...
+// [ parameter 0        ]
+// [ expression stack   ] (caller's java expression stack)
+
+// Assuming that we don't go to one of the trivial specialized entries
+// the stack will look like below when we are ready to execute the
+// first bytecode (or call the native routine). The register usage
+// will be as the template based interpreter expects (see
+// interpreter_amd64.hpp).
+//
+// local variables follow incoming parameters immediately; i.e.
+// the return address is moved to the end of the locals).
+//
+// [ monitor entry	      ] <--- sp
+//   ...
+// [ monitor entry	      ]
+// [ monitor block top        ] ( the top monitor entry )
+// [ byte code pointer        ] (if native, bcp = 0)
+// [ constant pool cache      ]
+// [ methodOop                ]
+// [ locals offset	      ]
+// [ sender's sp              ]
+// [ sender's fp              ]
+// [ return address           ] <--- fp
+// [ local var m-1            ]
+//   ...
+// [ local var 0              ]
+// [ argumnet word n-1        ] <--- ( sender's sp )
+//   ...
+// [ argument word 0          ] <--- S7
+
+address AbstractInterpreterGenerator::generate_method_entry(
+                                        AbstractInterpreter::MethodKind kind) {
+  // determine code generation flags
+  bool synchronized = false;
+  address entry_point = NULL;
+	switch (kind) {
+		case Interpreter::zerolocals             :                                                                             break;
+		case Interpreter::zerolocals_synchronized: synchronized = true;                                                        break;
+		case Interpreter::native                 :
+		entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false);
+				   break;
+		case Interpreter::native_synchronized    :
+		entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);
+				   break;
+		case Interpreter::empty                  :
+		entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();
+				  break;
+		case Interpreter::accessor               :
+		entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();
+				  break;
+		case Interpreter::abstract               :
+		entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();
+				  break;
+
+		case Interpreter::java_lang_math_sin     : // fall thru
+		case Interpreter::java_lang_math_cos     : // fall thru
+		case Interpreter::java_lang_math_tan     : // fall thru
+		case Interpreter::java_lang_math_abs     : // fall thru
+		case Interpreter::java_lang_math_log     : // fall thru
+		case Interpreter::java_lang_math_log10   : // fall thru
+		case Interpreter::java_lang_math_sqrt    : break;
+		entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);
+				  break;
+
+		default                                  : ShouldNotReachHere();                                                       break;
+	}
+	if (entry_point) return entry_point;
+
+	return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
+}
+
+// How much stack a method activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
+
+	const int entry_size    = frame::interpreter_frame_monitor_size();
+
+	// total overhead size: entry_size + (saved ebp thru expr stack bottom).
+	// be sure to change this if you add/subtract anything to/from the overhead area
+	const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
+
+	const int stub_code = 6;  // see generate_call_stub
+	// return overhead_size + method->max_locals() + method->max_stack() + stub_code;
+	const int method_stack = (method->max_locals() + method->max_stack()) *
+					Interpreter::stackElementWords();
+	return overhead_size + method_stack + stub_code;
+}
+
+int AbstractInterpreter::layout_activation(methodOop method,
+                                           int tempcount,
+                                           int popframe_extra_args,
+                                           int moncount,
+                                           int callee_param_count,
+                                           int callee_locals,
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame) {
+  // Note: This calculation must exactly parallel the frame setup
+  // in AbstractInterpreterGenerator::generate_method_entry.
+  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
+  // The frame interpreter_frame, if not NULL, is guaranteed to be the
+  // right size, as determined by a previous call to this method.
+  // It is also guaranteed to be walkable even though it is in a skeletal state
+
+  // fixed size of an interpreter frame:
+ // int max_locals = method->max_locals();
+
+ int max_locals = method->max_locals() * Interpreter::stackElementWords();
+ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords();
+
+  int overhead = frame::sender_sp_offset - frame::interpreter_frame_initial_sp_offset;
+  // Our locals were accounted for by the caller (or last_frame_adjust on the transistion)
+  // Since the callee parameters already account for the callee's params we only need to account for
+  // the extra locals.
+
+ // int size = overhead + callee_locals - callee_param_size + moncount*frame::interpreter_frame_monitor_size() + tempcount;
+ int size = overhead +
+	((callee_locals - callee_param_count)*Interpreter::stackElementWords()) +
+	 (moncount*frame::interpreter_frame_monitor_size()) +
+	 tempcount*Interpreter::stackElementWords() + popframe_extra_args;
+  if (interpreter_frame != NULL) {
+#ifdef ASSERT
+    assert(caller->sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
+#endif
+
+    interpreter_frame->interpreter_frame_set_method(method);
+    // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
+    // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
+    // and sender_sp is fp+8
+    jint* locals = interpreter_frame->sender_sp() + max_locals - 1;
+
+    interpreter_frame->interpreter_frame_set_locals(locals);
+    BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
+    BasicObjectLock* monbot = montop - moncount;
+    interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
+
+//set last sp;
+    intptr_t*  esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords() -
+			                popframe_extra_args;
+	printf("last sp is %x\n", esp);
+     interpreter_frame->interpreter_frame_set_last_sp(esp);
+    // All frames but the initial interpreter frame we fill in have a
+    // value for sender_sp that allows walking the stack but isn't
+    // truly correct. Correct the value here.
+    //
+   // int extra_locals = method->max_locals() - method->size_of_parameters();
+    if (extra_locals != 0 &&
+	interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
+      interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
+    }
+    *interpreter_frame->interpreter_frame_cache_addr() =
+      method->constants()->cache();
+  }
+  return size;
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+  // Entry point in previous activation (i.e., if the caller was
+  // interpreted)
+  Interpreter::_rethrow_exception_entry = __ pc();
+
+ // Restore sp to interpreter_frame_last_sp even though we are going
+ // to empty the expression stack for the exception processing.
+// __ movl(Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
+   __ sw(ZERO,FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+  // V0: exception
+  // V1: return address/pc that threw exception
+  __ restore_bcp();                              // esi points to call/send
+  __ restore_locals();
+
+  // Entry point for exceptions thrown within interpreter code
+  Interpreter::_throw_exception_entry = __ pc();
+  // expression stack is undefined here
+  // V0: exception
+  // BCP: exception bcp
+  __ verify_oop(V0);
+
+  // expression stack must be empty before entering the VM in case of an exception
+  __ empty_expression_stack();
+  // find exception handler address and preserve exception oop
+	__ move(A1, V0);
+  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
+  // V0: exception handler entry point
+  // V1: preserved exception oop
+  // S0: bcp for exception handler
+	__ addi(SP, SP, (-1) * wordSize);
+	__ sw(V1, SP, 0);                              // push exception which is now the only value on the stack
+  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
+	__ delayed()->nop();
+
+  // If the exception is not handled in the current frame the frame is removed and
+  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
+  //
+  // Note: At this point the bci is still the bxi for the instruction which caused
+  //       the exception and the expression stack is empty. Thus, for any VM calls
+  //       at this point, GC will find a legal oop map (with empty expression stack).
+
+  // In current activation
+  // V0: exception
+  // BCP: exception bcp
+
+  //
+  // JVMTI PopFrame support
+  //
+
+   Interpreter::_remove_activation_preserving_args_entry = __ pc();
+  __ empty_expression_stack();
+  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
+  // currently handling popframe, so that call_VMs that may happen later do not trigger new
+  // popframe handling cycles.
+#ifndef OPT_THREAD
+	Register thread = T2;
+  __ get_thread(T2);
+#else
+	Register thread = TREG;
+#endif
+  __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
+  __ ori(T3, T3, JavaThread::popframe_processing_bit);
+  __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
+
+#ifndef CORE
+  {
+    // Check to see whether we are returning to a deoptimized frame.
+    // (The PopFrame call ensures that the caller of the popped frame is
+    // either interpreted or compiled and deoptimizes it if compiled.)
+    // In this case, we can't call dispatch_next() after the frame is
+    // popped, but instead must save the incoming arguments and restore
+    // them after deoptimization has occurred.
+    //
+    // Note that we don't compare the return PC against the
+    // deoptimization blob's unpack entry because of the presence of
+    // adapter frames in C2.
+    Label caller_not_deoptimized;
+		__ lw(A0, FP, frame::return_addr_offset * wordSize);
+		__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
+		__ bne(V0, ZERO, caller_not_deoptimized);
+		__ delayed()->nop();
+
+    // Compute size of arguments for saving when returning to deoptimized caller
+		__ get_method(A1);
+	        __ verify_oop(A1);
+		__ lhu(A1, A1, in_bytes(methodOopDesc::size_of_parameters_offset()));
+		__ shl(A1, Interpreter::logStackElementSize());
+		__ restore_locals();
+		__ sub(A2, LVP, T0);
+		__ addiu(A2, A2, wordSize);
+    // Save these arguments
+#ifndef OPT_THREAD
+		__ get_thread(A0);
+#else
+		__ move(A0, TREG);
+#endif
+		__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
+
+
+
+		__ remove_activation(vtos, T9, false, false, false);
+
+    // Inform deoptimization that it is responsible for restoring these arguments
+#ifndef OPT_THREAD
+		__ get_thread(thread);
+#endif
+		__ move(AT, JavaThread::popframe_force_deopt_reexecution_bit);
+		__ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
+    // Continue in deoptimization handler
+    ///__ jmp(edx);
+		__ jr(T9);
+		__ delayed()->nop();
+
+    __ bind(caller_not_deoptimized);
+  }
+#endif /* !CORE */
+
+
+  __ remove_activation(vtos, T3,
+                       /* throw_monitor_exception */ false,
+                       /* install_monitor_exception */ false,
+                       /* notify_jvmdi */ false);
+
+  // Clear the popframe condition flag
+// Finish with popframe handling
+  // A previous I2C followed by a deoptimization might have moved the
+  // outgoing arguments further up the stack. PopFrame expects the
+  // mutations to those outgoing arguments to be preserved and other
+  // constraints basically require this frame to look exactly as
+  // though it had previously invoked an interpreted activation with
+  // no space between the top of the expression stack (current
+  // last_sp) and the top of stack. Rather than force deopt to
+  // maintain this kind of invariant all the time we call a small
+  // fixup routine to move the mutated arguments onto the top of our
+  // expression stack if necessary.
+  //why x86 write this , i think it is no use ,@jerome
+  //__ movl(eax, esp);
+  //__ movl(ebx, Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize));
+    __ move(T8, SP);
+    __ lw(T6, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+// PC must point into interpreter here
+  //__ set_last_Java_frame(ecx, noreg, ebp, __ pc());
+  __ set_last_Java_frame(thread, noreg, FP, __ pc());
+ // __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), ecx, eax, ebx);
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, T6);
+  __ get_thread(thread);
+  __ reset_last_Java_frame(thread, true, true);
+  // Restore the last_sp and null it out
+  __ lw(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+//  __ movl(Address(ebp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
+   __ sw(ZERO,FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+
+
+  __ move(AT, JavaThread::popframe_inactive);
+  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
+
+  // Finish with popframe handling
+  __ restore_bcp();
+  __ restore_locals();
+#ifndef CORE
+  // The method data pointer was incremented already during
+  // call profiling. We have to restore the mdp for the current bcp.
+  if (ProfileInterpreter) {
+    __ set_method_data_pointer_for_bcp();
+  }
+#endif // !CORE
+    // Clear the popframe condition flag
+   // __ get_thread(ecx);
+   // __ movl(Address(ecx, JavaThread::popframe_condition_offset()), JavaThread::popframe_inactive);
+
+    __ get_thread(thread);
+    __ move(AT,JavaThread::popframe_inactive);
+   __ sw(AT,thread, in_bytes(JavaThread::popframe_condition_offset()));
+   __ dispatch_next(vtos);
+  // end of PopFrame support
+
+  Interpreter::_remove_activation_entry = __ pc();
+
+  // preserve exception over this code sequence
+	__ lw(T0, SP, 0);
+	__ addi(SP, SP, wordSize);
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ sw(T0, thread, in_bytes(JavaThread::vm_result_offset()));
+  // remove the activation (without doing throws on illegalMonitorExceptions)
+   __ remove_activation(vtos, T3, false, true, false);
+  // restore exception
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ lw(T0, thread, in_bytes(JavaThread::vm_result_offset()));
+  __ sw(ZERO, thread, in_bytes(JavaThread::vm_result_offset()));
+  __ verify_oop(T0);
+
+  // Inbetween activations - previous activation type unknown yet
+  // compute continuation point - the continuation point expects
+  // the following registers set up:
+  //
+  // T0: exception																eax
+  // T1: return address/pc that threw exception		edx
+  // SP: expression stack of caller								esp
+  // FP: ebp of caller														ebp
+	__ addi(SP, SP, (-2) * wordSize);
+	__ sw(T0, SP, wordSize);												// save exception
+	__ sw(T3, SP, 0);                               // save return address
+	__ move(A0, T3);
+	__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), A0);
+  __ move(T7, V0);                             // save exception handler
+	__ lw(V0, SP, wordSize);												// restore exception
+	__ lw(V1, SP, 0);                               // restore return address
+	__ addi(SP, SP, 2 * wordSize);
+
+  // Note that an "issuing PC" is actually the next PC after the call
+  __ jr(T7);                                   // jump to exception handler of caller
+	__ delayed()->nop();
+}
+
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
+  address entry = __ pc();
+//aoqi:FIXME ?
+ // __ restore_bcp();
+ // __ restore_locals();
+  __ empty_expression_stack();
+  __ empty_FPU_stack();
+  __ load_earlyret_value(state);
+
+  //__ get_thread(ecx);
+  __ get_thread(TREG);
+//  __ movl(TREG, Address(TREG, JavaThread::jvmti_thread_state_offset()));
+   __ lw(TREG, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
+  //const Address cond_addr(ecx, JvmtiThreadState::earlyret_state_offset());
+  const Address cond_addr(TREG, in_bytes(JvmtiThreadState::earlyret_state_offset()));
+  // Clear the earlyret state
+ // __ movl(cond_addr, JvmtiThreadState::earlyret_inactive);
+    __ move(AT,JvmtiThreadState::earlyret_inactive);
+    __ sw(AT,cond_addr);
+    //__ remove_activation(state, esi,
+
+
+
+    __ remove_activation(state, T0,
+		       false, /* throw_monitor_exception */
+                       false, /* install_monitor_exception */
+                       true); /* notify_jvmdi */
+ // __ jmp(esi);
+  //__ jmp(T0);
+    __ jr(T0);
+    __ delayed()->nop();
+  return entry;
+} // end of ForceEarlyReturn support
+
+
+//-----------------------------------------------------------------------------
+// Helper for vtos entry point generation
+
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
+                                                         address& bep,
+                                                         address& cep,
+                                                         address& sep,
+                                                         address& aep,
+                                                         address& iep,
+                                                         address& lep,
+                                                         address& fep,
+                                                         address& dep,
+                                                         address& vep) {
+  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
+  Label L;
+  fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop();
+  dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop();
+  lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop();
+  aep  =__ pc(); __ push(atos); __ b(L); __ delayed()->nop();
+  bep = cep = sep = iep = __ pc(); __ push(itos);
+  vep = __ pc(); __ bind(L);    // fall through
+  generate_and_dispatch(t);
+}
+
+
+//-----------------------------------------------------------------------------
+// Generation of individual instructions
+
+// helpers for generate_and_dispatch
+
+
+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
+  : TemplateInterpreterGenerator(code) {
+   generate_all(); // down here so it can be "virtual"
+}
+
+//-----------------------------------------------------------------------------
+
+// Non-product code
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+  address entry = __ pc();
+
+	// prepare expression stack
+	__ push(state);       // save tosca
+
+	// tos & tos2, added by yjl 7/15/2005
+	// trace_bytecode need actually 4 args, the last two is tos&tos2
+	// this work fine for x86. but mips o32 call convention will store A2-A3
+	// to the stack position it think is the tos&tos2
+	// when the expression stack have no more than 2 data, error occur.
+	__ lw(A2, SP, 0);
+	__ lw(A3, SP, 4);
+
+	// pass arguments & call tracer
+	__ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3);
+	__ move(RA, V0);    // make sure return address is not destroyed by pop(state)
+
+	// restore expression stack
+	__ pop(state);        // restore tosca
+
+	// return
+	__ jr(RA);
+	__ delayed()->nop();
+
+	return entry;
+}
+
+void TemplateInterpreterGenerator::count_bytecode() {
+	__ move(T8, (int)&BytecodeCounter::_counter_value);
+	__ lw(AT, T8, 0);
+	__ addi(AT, AT, 1);
+	__ sw(AT, T8, 0);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
+	__ move(T8, (int)&BytecodeHistogram::_counters[t->bytecode()]);
+	__ lw(AT, T8, 0);
+	__ addi(AT, AT, 1);
+	__ sw(AT, T8, 0);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
+	__ move(T8, (int)&BytecodePairHistogram::_index);
+	__ lw(T7, T8, 0);
+	__ srl(T7, T7, BytecodePairHistogram::log2_number_of_codes);
+	__ move(T8, ((int)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
+	__ orr(T7, T7, T8);
+	__ move(T8, (int)&BytecodePairHistogram::_index);
+	__ sw(T7, T8, 0);
+	__ sll(T7, T7, 2);
+	__ move(T8, (int)BytecodePairHistogram::_counters);
+	__ add(T8, T8, T7);
+	__ lw(AT, T8, 0);
+	__ addi(AT, AT, 1);
+	__ sw(AT, T8, 0);
+}
+
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+  // Call a little run-time stub to avoid blow-up for each bytecode.
+  // The run-time runtime saves the right registers, depending on
+  // the tosca in-state for the given template.
+
+	address entry = Interpreter::trace_code(t->tos_in());
+	assert(entry != NULL, "entry must have been generated");
+	__ call(entry, relocInfo::none);
+	__ delayed()->nop();
+}
+
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+  Label L;
+	__ move(T8, int(&BytecodeCounter::_counter_value));
+	__ lw(T8, T8, 0);
+	__ move(AT, StopInterpreterAt);
+	__ bne(T8, AT, L);
+	__ delayed()->nop();
+	__ call(CAST_FROM_FN_PTR(address, os::breakpoint), relocInfo::runtime_call_type);
+	__ delayed()->nop();
+	__ bind(L);
+}
+#endif // !PRODUCT
+#endif // ! CC_INTERP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,40 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+
+  protected:
+
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreter to get the VM to print out the size.
+  // Max size with JVMTI and TaggedStackInterpreter
+#ifdef _LP64
+  // The sethi() instruction generates lots more instructions when shell
+  // stack limit is unlimited, so that's why this is much bigger.
+  const static int InterpreterCodeSize = 210 * K;
+#else
+  const static int InterpreterCodeSize = 180 * K;
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/templateTable_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,4816 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_templateTable_mips.cpp.incl"
+
+#ifndef CC_INTERP
+
+#define __ _masm->
+
+// Platform-dependent initialization
+
+void TemplateTable::pd_initialize() {
+  // No mips specific initialization
+}
+
+// Address computation: local variables
+// we use t8 as the local variables pointer register, by yjl 6/27/2005
+static inline Address iaddress(int n) {
+  //return Address(r14, Interpreter::local_offset_in_bytes(n));
+	return Address(LVP, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) {
+  return iaddress(n + 1);
+}
+
+static inline Address faddress(int n) {
+  return iaddress(n);
+}
+
+static inline Address daddress(int n) {
+  return laddress(n);
+}
+
+static inline Address aaddress(int n) {
+  return iaddress(n);
+}
+static inline Address haddress(int n)            { return iaddress(n + 0); }
+
+//FIXME , can not use add and sll
+/*
+static inline Address iaddress(Register r) {
+  return Address(r14, r, Address::times_8, Interpreter::value_offset_in_bytes());
+}
+
+static inline Address laddress(Register r) {
+  return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1));
+}
+
+static inline Address faddress(Register r) {
+  return iaddress(r);
+}
+
+static inline Address daddress(Register r) {
+  return laddress(r);
+}
+
+static inline Address aaddress(Register r) {
+  return iaddress(r);
+}
+*/
+
+static inline Address at_sp() 						{	return Address(SP, 	0); }
+static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
+static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
+
+// At top of Java expression stack which may be different than esp().  It
+// isn't for category 1 objects.
+static inline Address at_tos   () {
+  //return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
+	Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
+	return tos;
+}
+
+static inline Address at_tos_p1() {
+  //return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
+	return Address(SP,  Interpreter::expr_offset_in_bytes(1));
+}
+
+static inline Address at_tos_p2() {
+  //return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
+	return Address(SP,  Interpreter::expr_offset_in_bytes(2));
+}
+
+static inline Address at_tos_p3() {
+  //return Address(rsp,  Interpreter::expr_offset_in_bytes(3));
+	return Address(SP,  Interpreter::expr_offset_in_bytes(3));
+}
+/*
+// Condition conversion
+static Assembler::Condition j_not(TemplateTable::Condition cc) {
+  switch (cc) {
+  case TemplateTable::equal        : return Assembler::notEqual;
+  case TemplateTable::not_equal    : return Assembler::equal;
+  case TemplateTable::less         : return Assembler::greaterEqual;
+  case TemplateTable::less_equal   : return Assembler::greater;
+  case TemplateTable::greater      : return Assembler::lessEqual;
+  case TemplateTable::greater_equal: return Assembler::less;
+  }
+  ShouldNotReachHere();
+  return Assembler::zero;
+}
+*/
+
+// Miscelaneous helper routines
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+/*
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movq(rdx, obj.base());
+          }
+        } else {
+          __ leaq(rdx, obj);
+        }
+        __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
+        if (val == noreg) {
+          __ store_heap_oop(Address(rdx, 0), NULL_WORD);
+        } else {
+          __ store_heap_oop(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, val, r8, rbx);
+        }
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ store_heap_oop(obj, NULL_WORD);
+        } else {
+          __ store_heap_oop(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leaq(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ store_heap_oop(obj, NULL_WORD);
+      } else {
+        __ store_heap_oop(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+*/
+// we use S1 as bcp, be sure you have bcp in S1 before you call any of the Template generator
+Address TemplateTable::at_bcp(int offset) {
+  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+  return Address(BCP, offset);
+}
+
+#define callee_saved_register(R) assert((R>=S0 && R<=S7), "should use callee saved registers!")
+
+// bytecode folding
+void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
+                                   Register scratch,
+                                   bool load_bc_into_scratch/*=true*/) {
+  if (!RewriteBytecodes) {
+    return;
+  }
+  // the pair bytecodes have already done the load.
+  if (load_bc_into_scratch) {
+    __ move(bc, bytecode);
+  }
+  Label patch_done;
+  if (JvmtiExport::can_post_breakpoint()) {
+    Label fast_patch;
+    // if a breakpoint is present we can't rewrite the stream directly
+		__ lbu(scratch, at_bcp(0));
+		__ move(AT, Bytecodes::_breakpoint);
+		__ bne(scratch, AT, fast_patch);
+		__ delayed()->nop();
+
+		__ get_method(scratch);
+		// Let breakpoint table handling rewrite to quicker bytecode
+		__ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+				InterpreterRuntime::set_original_bytecode_at), scratch, BCP, bc);
+
+		__ b(patch_done);
+		__ delayed()->nop();
+		__ bind(fast_patch);
+	}
+
+#ifdef ASSERT
+	Label okay;
+	__ lbu(scratch, at_bcp(0));
+	__ move(AT, (int)Bytecodes::java_code(bytecode));
+	__ beq(scratch, AT, okay);
+	__ delayed()->nop();
+	__ beq(scratch, bc, patch_done);
+	__ delayed()->nop();
+	__ stop("patching the wrong bytecode");
+	__ bind(okay);
+#endif
+
+	// patch bytecode
+	__ sb(bc, at_bcp(0));
+	__ bind(patch_done);
+}
+
+
+// Individual instructions
+
+void TemplateTable::nop() {
+  transition(vtos, vtos);
+  // nothing to do
+}
+
+void TemplateTable::shouldnotreachhere() {
+  transition(vtos, vtos);
+  __ stop("shouldnotreachhere bytecode");
+}
+
+void TemplateTable::aconst_null() {
+  transition(vtos, atos);
+	__ move(FSR, ZERO);
+}
+
+void TemplateTable::iconst(int value) {
+  transition(vtos, itos);
+  if (value == 0) {
+    //__ xorl(rax, rax);
+		__ move(FSR, ZERO);
+  } else {
+    //__ movl(rax, value);
+		__ move(FSR, value);
+  }
+}
+
+void TemplateTable::lconst(int value) {
+  transition(vtos, ltos);
+  if (value == 0) {
+		__ move(FSR, ZERO);
+  } else {
+		__ move(FSR, value);
+  }
+	assert(value >= 0, "check this code");
+	__ move(SSR, ZERO);
+}
+
+const static float  _f0 = 0.0, _f1 = 1.0, _f2 = 2.0;
+
+const static double _d0 = 0.0, _d1 = 1.0;
+
+void TemplateTable::fconst(int value) {
+  transition(vtos, ftos);
+	if (value == 0) {
+		__ lui(AT, Assembler::split_high((int)&_f0));
+		__ lwc1(FSF, AT, Assembler::split_low((int)&_f0));
+	} else if (value == 1) {
+		__ lui(AT, Assembler::split_high((int)&_f1));
+		__ lwc1(FSF, AT, Assembler::split_low((int)&_f1));
+	} else if (value == 2) {
+		__ lui(AT, Assembler::split_high((int)&_f2));
+		__ lwc1(FSF, AT, Assembler::split_low((int)&_f2));
+	} else {
+		ShouldNotReachHere();
+	}
+}
+
+void TemplateTable::dconst(int value) {
+  transition(vtos, dtos);
+	if (value == 0) {
+		__ lui(AT, Assembler::split_high((int)&_d0));
+		__ lwc1(FSF, AT, Assembler::split_low((int)&_d0));
+		__ lwc1(SSF, AT, Assembler::split_low((int)&_d0)+4);
+	} else if (value == 1) {
+		__ lui(AT, Assembler::split_high((int)&_d1));
+		__ lwc1(FSF, AT, Assembler::split_low((int)&_d1));
+		__ lwc1(SSF, AT, Assembler::split_low((int)&_d1)+4);
+	} else {
+		ShouldNotReachHere();
+	}
+}
+
+void TemplateTable::bipush() {
+	transition(vtos, itos);
+	__ lb(FSR, at_bcp(1));
+}
+
+void TemplateTable::sipush() {
+	transition(vtos, itos);
+	__ load_two_bytes_from_at_bcp(FSR, AT, 1);
+	__ hswap(FSR);
+}
+
+// used register : T2, T3, T4
+// T2 : index
+// T3 : cpool
+// T4 : tag
+void TemplateTable::ldc(bool wide) {
+  transition(vtos, vtos);
+  Label call_ldc, notFloat, notClass, Done;
+	// get index in cpool
+  if (wide) {
+		__ load_two_bytes_from_at_bcp(T2, AT, 1);
+		__ huswap(T2);
+  } else {
+		__ lbu(T2, at_bcp(1));
+  }
+
+	__ get_cpool_and_tags(T3, T4);
+
+  const int base_offset = constantPoolOopDesc::header_size() * wordSize;
+  const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
+
+  // get type
+	__ add(AT, T4, T2);
+	__ lb(T4, AT, tags_offset);
+	//now T4 is the tag
+
+  // unresolved string - get the resolved string
+	__ addiu(AT, T4, - JVM_CONSTANT_UnresolvedString);
+	__ beq(AT, ZERO, call_ldc);
+	__ delayed()->nop();
+
+	// unresolved class - get the resolved class
+	__ addiu(AT, T4, - JVM_CONSTANT_UnresolvedClass);
+	__ beq(AT, ZERO, call_ldc);
+	__ delayed()->nop();
+	// unresolved class in error (resolution failed) - call into runtime
+	// so that the same error from first resolution attempt is thrown.
+	//  __ cmpl(edx, JVM_CONSTANT_UnresolvedClassInError);
+	__ addiu(AT, T4, -JVM_CONSTANT_UnresolvedClassInError);
+	//	__ jccb(Assembler::equal, call_ldc);
+
+	__ beq(AT, ZERO, call_ldc);
+	__ delayed()->nop();
+
+	// resolved class - need to call vm to get java mirror of the class
+	__ addiu(AT, T4, - JVM_CONSTANT_Class);
+	__ bne(AT, ZERO, notClass);
+	__ delayed()->sll(T2, T2, 2);
+
+	__ bind(call_ldc);
+
+	__ move(A1, wide);
+	call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
+//	__ sw(FSR, SP, - 1 * wordSize);
+	__ push(atos);
+	__ b(Done);
+//	__ delayed()->addi(SP, SP, - 1 * wordSize);
+	__ delayed()->nop();
+	__ bind(notClass);
+
+	__ addiu(AT, T4, -JVM_CONSTANT_Float);
+	__ bne(AT, ZERO, notFloat);
+	__ delayed()->nop();
+	// ftos
+	__ add(AT, T3, T2);
+	__ lwc1(FSF, AT, base_offset);
+	__ swc1(FSF, SP, - 1 * wordSize);
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - 1 * wordSize);
+
+	__ bind(notFloat);
+#ifdef ASSERT
+	{
+		Label L;
+		__ addiu(AT, T4, -JVM_CONSTANT_Integer);
+		__ beq(AT, ZERO, L);
+		__ delayed()->addiu(AT, T4, -JVM_CONSTANT_String);
+		__ beq(AT, ZERO, L);
+		__ delayed()->nop();
+		__ stop("unexpected tag type in ldc");
+		__ bind(L);
+	}
+#endif
+	// atos and itos
+	Label isOop;
+	__ add(AT, T3, T2);
+	__ lw(FSR, AT, base_offset);
+	// String is only oop type we will see here
+	__ addiu(AT, T4, -JVM_CONSTANT_String);
+	//__ bne(AT, ZERO, Done);
+	__ beq(AT,ZERO,isOop);
+	__ delayed()->nop();
+	__ push(itos);
+	__ b(Done);
+	__ delayed()->nop();
+	__ bind(isOop);
+	__ push(atos);
+
+
+	if (VerifyOops) {
+		__ verify_oop(FSR);
+	}
+
+	__ bind(Done);
+}
+
+// used register: T2, T3, T4
+// T2 : index
+// T3 : cpool
+// T4 : tag
+void TemplateTable::ldc2_w() {
+  transition(vtos, vtos);
+  Label Long, Done;
+
+	// get index in cpool
+	__ load_two_bytes_from_at_bcp(T2, AT, 1);
+	__ huswap(T2);
+
+	__ get_cpool_and_tags(T3, T4);
+
+	const int base_offset = constantPoolOopDesc::header_size() * wordSize;
+	const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
+
+	// get type in T4
+	__ add(AT, T4, T2);
+	__ lb(T4, AT, tags_offset);
+
+	__ addiu(AT, T4, - JVM_CONSTANT_Double);
+	__ bne(AT, ZERO, Long);
+	__ delayed()->sll(T2, T2, 2);
+	// dtos
+	__ addu(AT, T3, T2);
+	__ lwc1(FSF, AT, base_offset + 0 * wordSize);
+	__ lwc1(SSF, AT, base_offset + 1 * wordSize);
+	__ swc1(FSF, SP, - 2*wordSize);
+	__ swc1(SSF, SP, - 1*wordSize);
+	__ b(Done);
+	__ delayed()->addi(SP, SP, -8);
+
+	// ltos
+	__ bind(Long);
+	__ add(AT, T3, T2);
+	__ lw(FSR, AT, base_offset + 0 * wordSize);
+	__ lw(SSR, AT, base_offset + 1 * wordSize);
+	__ push(ltos);
+
+	__ bind(Done);
+}
+
+// we compute the actual local variable address here
+// the x86 dont do so for it has scaled index memory access model, we dont have, so do here
+//FIXME
+void TemplateTable::locals_index(Register reg, int offset) {
+	__ lbu(reg, at_bcp(offset));
+	__ sll(reg, reg, 2);
+	__ sub(reg, LVP, reg);
+}
+
+// this method will do bytecode folding of the two form:
+// iload iload			iload caload
+// used register : T2, T3
+// T2 : bytecode
+// T3 : folded code
+void TemplateTable::iload() {
+	transition(vtos, itos);
+	if (RewriteFrequentPairs) {
+		Label rewrite, done;
+		// get the next bytecode in T2
+		__ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
+		// if _iload, wait to rewrite to iload2.  We only want to rewrite the
+		// last two iloads in a pair.  Comparing against fast_iload means that
+		// the next bytecode is neither an iload or a caload, and therefore
+		// an iload pair.
+		__ move(AT, Bytecodes::_iload);
+		__ beq(AT, T2, done);
+		__ delayed()->nop();
+
+		__ move(AT, Bytecodes::_fast_iload);
+		__ beq(AT, T2, rewrite);
+		__ delayed();
+		__ move(T3, Bytecodes::_fast_iload2);
+
+		// if _caload, rewrite to fast_icaload
+		__ move(AT, Bytecodes::_caload);
+		__ beq(AT, T2, rewrite);
+		__ delayed();
+		__ move(T3, Bytecodes::_fast_icaload);
+
+		// rewrite so iload doesn't check again.
+		__ move(T3, Bytecodes::_fast_iload);
+
+		// rewrite
+		// T3 : fast bytecode
+		__ bind(rewrite);
+		patch_bytecode(Bytecodes::_iload, T3, T2, false);
+		__ bind(done);
+	}
+
+	// Get the local value into tos
+	locals_index(T2);
+	__ lw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fast_iload2() {
+	transition(vtos, itos);
+	locals_index(T2);
+	__ lw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+	__ push(itos);
+	locals_index(T2, 3);
+	__ lw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fast_iload() {
+	transition(vtos, itos);
+	locals_index(T2);
+	__ lw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::lload() {
+
+	transition(vtos, ltos);
+	locals_index(T2);
+	__ lw(FSR, T2, -4);
+	__ lw(SSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fload() {
+	transition(vtos, ftos);
+	locals_index(T2);
+	__ lwc1(FSF, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::dload() {
+
+	transition(vtos, dtos);
+	locals_index(T2);
+	if (TaggedStackInterpreter) {
+		// Get double out of locals array, onto temp stack and load with
+		// float instruction into ST0
+		//    __ movl(eax, laddress(ebx));
+		__ sll(AT,T2,Interpreter::stackElementScale());
+		__ add(AT, LVP, AT);
+		__ lwc1(FSF, AT, Interpreter::local_offset_in_bytes(1));
+		// __ movl(edx, haddress(ebx));
+		__ lwc1(SSF, AT, Interpreter::local_offset_in_bytes(0));
+
+		//   __ pushl(edx);  // push hi first
+		// __ pushl(eax);
+		//    __ fld_d(Address(esp));
+		//   __ addl(esp, 2*wordSize);
+		debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+	} else {
+		__ lwc1(FSF, T2, -4);
+		__ lwc1(SSF, T2, 0);
+	}
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::aload()
+{
+	transition(vtos, atos);
+
+	locals_index(T2);
+	__ lw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+void TemplateTable::locals_index_wide(Register reg) {
+	__ load_two_bytes_from_at_bcp(reg, AT, 2);
+	__ huswap(reg);
+	__ sll(reg, reg, 2);
+	__ sub(reg, LVP, reg);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_iload() {
+	transition(vtos, itos);
+	locals_index_wide(T2);
+	__ lw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_lload() {
+	transition(vtos, ltos);
+	locals_index_wide(T2);
+	__ lw(FSR, T2, -4);
+	__ lw(SSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_fload() {
+	transition(vtos, ftos);
+	locals_index_wide(T2);
+	__ lwc1(FSF, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_dload() {
+	transition(vtos, dtos);
+	locals_index_wide(T2);
+	if (TaggedStackInterpreter) {
+		// Get double out of locals array, onto temp stack and load with
+		// float instruction into ST0
+		//   __ movl(eax, laddress(ebx));
+		//  __ movl(edx, haddress(ebx));
+		__ sll(AT,T2,Interpreter::stackElementScale());
+		__ add(AT, LVP, AT);
+		__ lwc1(FSF, AT, Interpreter::local_offset_in_bytes(1));
+		// __ movl(edx, haddress(ebx));
+		__ lwc1(SSF, AT, Interpreter::local_offset_in_bytes(0));
+
+		//  __ pushl(edx);  // push hi first
+		//  __ pushl(eax);
+		//  __ fld_d(Address(esp));
+		//  __ addl(esp, 2*wordSize);
+		debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+	} else {
+		__ lwc1(FSF, T2, -4);
+		__ lwc1(SSF, T2, 0);
+	}
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_aload() {
+	transition(vtos, atos);
+	locals_index_wide(T2);
+	__ lw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+// we use A2 as the regiser for index, BE CAREFUL!
+// we dont use our tge 29 now, for later optimization
+void TemplateTable::index_check(Register array, Register index) {
+	// Pop ptr into array
+	__ pop_ptr(array);
+	index_check_without_pop(array, index);
+}
+
+void TemplateTable::index_check_without_pop(Register array, Register index) {
+	// destroys ebx
+	// check array
+	__ null_check(array, arrayOopDesc::length_offset_in_bytes());
+
+	// check index
+	Label ok;
+	__ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
+#ifndef OPT_RANGECHECK
+	__ sltu(AT, index, AT);
+	__ bne(AT, ZERO, ok);
+	__ delayed()->nop();
+
+	//throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
+	if (A2!=index) __ move(A2, index);
+	__ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+	__ delayed()->nop();
+	__ bind(ok);
+#else
+	__ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
+	__ move(A2, index);
+	__ tgeu(A2, AT, 29);
+#endif
+}
+
+void TemplateTable::iaload() {
+	transition(itos, itos);
+	//  __ pop(SSR);
+	index_check(SSR, FSR);
+	__ shl(FSR, 2);
+	__ add(FSR, SSR, FSR);
+	//FSR: index
+	__ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
+
+}
+
+
+void TemplateTable::laload() {
+	transition(itos, ltos);
+	//  __ pop(SSR);
+	index_check(SSR, FSR);
+	__ sll(AT, FSR, 3);
+	__ add(AT, SSR, AT);
+	__ lw(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
+	__ lw(SSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize);
+}
+
+void TemplateTable::faload() {
+	transition(itos, ftos);
+	// __ pop(SSR);
+	index_check(SSR, FSR);
+	__ shl(FSR, 2);
+	__ add(FSR, SSR, FSR);
+	__ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+}
+
+void TemplateTable::daload() {
+	transition(itos, dtos);
+	//__ pop(SSR);
+	index_check(SSR, FSR);
+	__ sll(AT, FSR, 3);
+	__ add(AT, SSR, AT);
+	__ lwc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
+	__ lwc1(SSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 1 * wordSize);
+}
+
+void TemplateTable::aaload() {
+	transition(itos, atos);
+	//__ pop(SSR);
+	index_check(SSR, FSR);
+	__ shl(FSR, 2);
+	__ add(FSR, SSR, FSR);
+	__ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+}
+
+void TemplateTable::baload() {
+	transition(itos, itos);
+	//__ pop(SSR);
+	index_check(SSR, FSR);
+	__ add(FSR, SSR, FSR);
+	__ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+}
+
+void TemplateTable::caload() {
+	transition(itos, itos);
+	// __ pop(SSR);
+	index_check(SSR, FSR);
+	__ shl(FSR, 1);
+	__ add(FSR, SSR, FSR);
+	__ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
+}
+
+// iload followed by caload frequent pair
+// used register : T2
+// T2 : index
+void TemplateTable::fast_icaload() {
+	transition(vtos, itos);
+	// load index out of locals
+	locals_index(T2);
+	__ lw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+//	__ pop(SSR);
+	index_check(SSR, FSR);
+	__ shl(FSR, 1);
+	__ add(FSR, SSR, FSR);
+	__ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
+}
+
+void TemplateTable::saload() {
+	transition(itos, itos);
+	// __ pop(SSR);
+	index_check(SSR, FSR);
+	__ shl(FSR, 1);
+	__ add(FSR, SSR, FSR);
+	__ lh(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
+}
+
+void TemplateTable::iload(int n) {
+	transition(vtos, itos);
+	__ lw(FSR, iaddress(n));
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+}
+
+void TemplateTable::lload(int n) {
+	transition(vtos, ltos);
+	__ lw(FSR, laddress(n));
+	__ lw(SSR, haddress(n));
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+}
+
+void TemplateTable::fload(int n) {
+	transition(vtos, ftos);
+	__ lwc1(FSF, faddress(n));
+	debug_only(__ verify_local_tag(frame::TagValue, T2));
+}
+//FIXME here
+void TemplateTable::dload(int n) {
+	transition(vtos, dtos);
+	if (TaggedStackInterpreter) {
+		// Get double out of locals array, onto temp stack and load with
+		// float instruction into ST0
+		//__ movl(eax, laddress(n));
+		//__ movl(edx, haddress(n));
+		//__ pushl(edx);  // push hi first
+		//__ pushl(eax);
+		//  __ fld_d(Address(esp));
+		// __ addl(esp, 2*wordSize);  // reset esp
+		__ lwc1(FSF, laddress(n));
+		__ lwc1(SSF, haddress(n));
+		debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+	} else {
+		__ lwc1(FSF, laddress(n));
+		__ lwc1(SSF, haddress(n));
+	}
+}
+
+void TemplateTable::aload(int n) {
+	transition(vtos, atos);
+	__ lw(FSR, aaddress(n));
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+// used register : T2, T3
+// T2 : bytecode
+// T3 : folded code
+void TemplateTable::aload_0() {
+	transition(vtos, atos);
+	// According to bytecode histograms, the pairs:
+	//
+	// _aload_0, _fast_igetfield
+	// _aload_0, _fast_agetfield
+	// _aload_0, _fast_fgetfield
+	//
+	// occur frequently. If RewriteFrequentPairs is set, the (slow) _aload_0
+	// bytecode checks if the next bytecode is either _fast_igetfield,
+	// _fast_agetfield or _fast_fgetfield and then rewrites the
+	// current bytecode into a pair bytecode; otherwise it rewrites the current
+	// bytecode into _fast_aload_0 that doesn't do the pair check anymore.
+	//
+	// Note: If the next bytecode is _getfield, the rewrite must be delayed,
+	//       otherwise we may miss an opportunity for a pair.
+	//
+	// Also rewrite frequent pairs
+	//   aload_0, aload_1
+	//   aload_0, iload_1
+	// These bytecodes with a small amount of code are most profitable to rewrite
+	if (RewriteFrequentPairs) {
+		Label rewrite, done;
+		// get the next bytecode in T2
+		__ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
+
+		// do actual aload_0
+		aload(0);
+
+		// if _getfield then wait with rewrite
+		__ move(AT, Bytecodes::_getfield);
+		__ beq(AT, T2, done);
+		__ delayed()->nop();
+
+		// if _igetfield then reqrite to _fast_iaccess_0
+		assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
+				Bytecodes::_aload_0, "fix bytecode definition");
+		__ move(AT, Bytecodes::_fast_igetfield);
+		__ beq(AT, T2, rewrite);
+		__ delayed();
+		__ move(T3, Bytecodes::_fast_iaccess_0);
+
+		// if _agetfield then reqrite to _fast_aaccess_0
+		assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
+				Bytecodes::_aload_0, "fix bytecode definition");
+		__ move(AT, Bytecodes::_fast_agetfield);
+		__ beq(AT, T2, rewrite);
+		__ delayed();
+		__ move(T3, Bytecodes::_fast_aaccess_0);
+
+		// if _fgetfield then reqrite to _fast_faccess_0
+		assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
+				Bytecodes::_aload_0, "fix bytecode definition");
+		__ move(AT, Bytecodes::_fast_fgetfield);
+		__ beq(AT, T2, rewrite);
+		__ delayed();
+		__ move(T3, Bytecodes::_fast_faccess_0);
+
+		// else rewrite to _fast_aload0
+		assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
+				Bytecodes::_aload_0, "fix bytecode definition");
+		__ move(T3, Bytecodes::_fast_aload_0);
+
+		// rewrite
+		__ bind(rewrite);
+		patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
+
+		__ bind(done);
+	} else {
+		aload(0);
+	}
+}
+
+void TemplateTable::istore() {
+	transition(itos, vtos);
+	locals_index(T2);
+	__ sw(FSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+void TemplateTable::lstore() {
+	transition(ltos, vtos);
+	locals_index(T2);
+	__ sw(FSR, T2, -4);
+	__ sw(SSR, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+void TemplateTable::fstore() {
+	transition(ftos, vtos);
+	locals_index(T2);
+	__ swc1(FSF, T2, 0);
+	debug_only(__ verify_local_tag(frame::TagCategory2, T2));
+}
+
+void TemplateTable::dstore() {
+	transition(dtos, vtos);
+	locals_index(T2);
+	if (TaggedStackInterpreter) {
+		// Store double on stack and reload into locals nonadjacently
+		//    __ subl(esp, 2 * wordSize);
+		//   __ fstp_d(Address(esp));
+		//  __ popl(eax);
+		// __ popl(edx);
+		//__ movl(laddress(ebx), eax);
+		//__ movl(haddress(ebx), edx);
+		// __ swc1(FSF, laddress(T2));
+		//__ swc1(SSF,  haddress(T2));
+		__ sll(AT,T2,Interpreter::stackElementScale());
+		__ add(AT, LVP, AT);
+		__ lwc1(FSF, AT, Interpreter::local_offset_in_bytes(1));
+		// __ movl(edx, haddress(ebx));
+		__ lwc1(SSF, AT, Interpreter::local_offset_in_bytes(0));
+
+
+		__ tag_local(frame::TagCategory2, T2);
+	} else {
+		__ swc1(FSF, T2, -4);
+		__ swc1(SSF, T2, 0);
+	}
+}
+
+void TemplateTable::astore() {
+	transition(vtos, vtos);
+	//  __ pop(FSR);
+	__ pop_ptr(FSR, SSR);
+	locals_index(T2);
+	__ sw(FSR, T2, 0);
+	__ tag_local(SSR, T2);    // need to store same tag in local may be returnAddr
+
+}
+
+void TemplateTable::wide_istore() {
+	transition(vtos, vtos);
+	//  __ pop(FSR);
+	__ pop_i(FSR);
+	locals_index_wide(T2);
+	__ sw(FSR, T2, 0);
+	__ tag_local(frame::TagValue, T2);
+}
+
+void TemplateTable::wide_lstore() {
+	transition(vtos, vtos);
+	//__ pop2(FSR, SSR);
+	//__ pop_l(FSR, SSR);
+	__ pop_l(FSR); //aoqi:FIXME Is this right?
+	locals_index_wide(T2);
+	__ sw(FSR, T2, -4);
+	__ sw(SSR, T2, 0);
+	__ tag_local(frame::TagCategory2, T2);
+}
+
+void TemplateTable::wide_fstore() {
+	wide_istore();
+}
+
+void TemplateTable::wide_dstore() {
+	wide_lstore();
+}
+
+void TemplateTable::wide_astore() {
+	//  wide_istore();
+	transition(vtos, vtos);
+	//  __ pop_ptr(eax, edx);
+	__ pop_ptr(FSR, SSR);
+	// locals_index_wide(ebx);
+	locals_index_wide(T2);
+	//__ movl(aaddress(ebx), eax);
+	//  __ sw(FSR, aaddress(T2));
+	__ sll(AT,T2,Interpreter::stackElementScale());
+	__ add(AT, LVP, AT);
+	__ addi(AT, AT, Interpreter::value_offset_in_bytes());
+	__ tag_local(SSR,AT );
+
+}
+
+// used register : T2
+void TemplateTable::iastore() {
+	transition(itos, vtos);
+	/*
+	   __ pop2(SSR, T2);
+	   index_check(T2, SSR);
+	   __ shl(SSR, 2);
+	   __ add(T2, T2, SSR);
+	   __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
+	   */
+	// __ pop_i(ebx);
+	__ pop_i(SSR);
+	index_check(T2, SSR);  // prefer index in ebx
+	__ shl(SSR, Address::times_4);
+	__ add(T2, T2, SSR);
+	__ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
+}
+
+
+
+// used register T2, T3
+void TemplateTable::lastore() {
+	transition(ltos, vtos);
+	//	__ pop2(T2, T3);
+	__ pop_i (T2);
+	index_check(T3, T2);
+	__ shl(T2, 3);
+	__ add(T3, T3, T2);
+	__ sw(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
+	__ sw(SSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize);
+}
+
+// used register T2
+void TemplateTable::fastore() {
+	transition(ftos, vtos);
+	//__ pop2(SSR, T2);
+         __ pop_i(SSR);
+	index_check(T2, SSR);
+	__ shl(SSR, 2);
+	__ add(T2, T2, SSR);
+	__ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+}
+
+// used register T2, T3
+void TemplateTable::dastore() {
+	transition(dtos, vtos);
+	//__ pop2(T2, T3);
+	__ pop_i (T2);
+	index_check(T3, T2);
+	__ shl(T2, Address::times_8);
+	__ addu(T3, T3, T2);
+	__ swc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
+	__ swc1(SSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 1 * wordSize);
+
+}
+
+// used register : T2, T3, T4
+// T2 : array
+// T3 : subklass
+// T4 : supklass
+void TemplateTable::aastore() {
+	Label is_null, ok_is_subtype, done;
+	transition(vtos, vtos);
+	// stack: ..., array, index, value
+	//  __ lw(FSR, at_sp());     // Value
+	//  __ lw(SSR, at_sp_p1());  // Index
+	//  __ lw(T2, at_sp_p2());  // Array
+	__ lw(FSR, at_tos());     // Value
+	__ lw(SSR, at_tos_p1());  // Index
+	__ lw(T2, at_tos_p2());  // Array
+
+	// index_check(T2, SSR);
+	index_check_without_pop(T2, SSR);
+	// do array store check - check for NULL value first
+	__ beq(FSR, ZERO, is_null);
+	__ delayed()->nop();
+	__ profile_checkcast(false, T3); // Blows T3
+
+	// Move subklass into T3
+	__ lw(T3,  Address(FSR, oopDesc::klass_offset_in_bytes()));
+	// Move superklass into T4
+	__ lw(T4, Address(T2, oopDesc::klass_offset_in_bytes()));
+	__ lw(T4, Address(T4, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
+	// Compress array+index*4+12 into a single register. T2
+	__ sll(AT, SSR, 2);
+	__ add(T2, T2, AT);
+	__ addi(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+	// Generate subtype check.
+	// Superklass in T4.  Subklass in T3.
+	__ gen_subtype_check(T4, T3, ok_is_subtype);
+	// Come here on failure
+	// object is at FSR
+	__ jmp(Interpreter::_throw_ArrayStoreException_entry);
+	__ delayed()->nop();
+	// Come here on success
+	__ bind(ok_is_subtype);
+	__ sw(FSR, T2, 0);
+	__ store_check(T2);
+	__ b(done);
+	__ delayed()->nop();
+
+	// Have a NULL in FSR, EDX=T2, SSR=index.  Store NULL at ary[idx]
+	__ bind(is_null);
+	__ profile_checkcast(true, T3);	//blows T3
+	__ sll(AT, SSR, 2);
+	__ add(T2, T2, AT);
+	__ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+	__ bind(done);
+	__ addi(SP, SP, 3 * Interpreter::stackElementSize());
+
+}
+
+void TemplateTable::bastore() {
+	transition(itos, vtos);
+	//__ pop2(SSR, T2);
+	__ pop_i (SSR);
+	index_check(T2, SSR);
+	__ add(SSR, T2, SSR);
+	__ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+}
+
+void TemplateTable::castore() {
+	transition(itos, vtos);
+	//__ pop2(SSR, T2);
+	__ pop_i(SSR);
+	index_check(T2, SSR);
+	__ shl(SSR, 1);
+	__ add(SSR, T2, SSR);
+	__ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
+}
+
+void TemplateTable::sastore() {
+	castore();
+}
+
+void TemplateTable::istore(int n) {
+	transition(itos, vtos);
+	__ sw(FSR, iaddress(n));
+	__ tag_local(frame::TagValue, n);
+}
+
+void TemplateTable::lstore(int n) {
+	transition(ltos, vtos);
+	__ sw(FSR, laddress(n));
+	__ sw(SSR, haddress(n));
+	__ tag_local(frame::TagCategory2, n);
+}
+
+void TemplateTable::fstore(int n) {
+	transition(ftos, vtos);
+	__ swc1(FSF, faddress(n));
+	__ tag_local(frame::TagValue, n);
+}
+//FIXME,
+void TemplateTable::dstore(int n) {
+	transition(dtos, vtos);
+	if (TaggedStackInterpreter) {
+		/*  __ subl(esp, 2 * wordSize);
+		    __ fstp_d(Address(esp));
+		    __ popl(eax);
+		    __ popl(edx);
+		    __ movl(laddress(n), eax);
+		    __ movl(haddress(n), edx);
+		    */
+		__ swc1(FSF, laddress(n));
+		__ swc1(SSF, haddress(n));
+		__ tag_local(frame::TagCategory2, n);
+	} else {
+		__ swc1(FSF, laddress(n));
+		__ swc1(SSF, haddress(n));
+	}
+}
+
+void TemplateTable::astore(int n) {
+	transition(vtos, vtos);
+	//__ pop(FSR);
+	__ pop_ptr(FSR, SSR);
+	__ sw(FSR, aaddress(n));
+	__ tag_local(SSR, n);
+}
+
+void TemplateTable::pop() {
+	transition(vtos, vtos);
+	//  __ pop();
+	__ addi(SP, SP, Interpreter::stackElementSize());
+}
+
+void TemplateTable::pop2() {
+	transition(vtos, vtos);
+	//__ pop2();
+	__ addi(SP, SP, 2*Interpreter::stackElementSize());
+}
+
+void TemplateTable::dup() {
+	transition(vtos, vtos);
+	// stack: ..., a
+	//	__ lw(AT, SP, 0);
+	//	__ push(AT);
+	__ load_ptr_and_tag(0, FSR, SSR);
+	__ push_ptr(FSR, SSR);
+	// stack: ..., a, a
+}
+
+// blows FSR
+void TemplateTable::dup_x1() {
+	transition(vtos, vtos);
+	// stack: ..., a, b
+	__ load_ptr_and_tag(0, FSR, SSR);  // load b
+	__ load_ptr_and_tag(1, T5, T4);  // load a
+	__ store_ptr_and_tag(1, FSR, SSR); // store b
+	__ store_ptr_and_tag(0, T5, T4); // store a
+	__ push_ptr(FSR, SSR);             // push b
+	// stack: ..., b, a, b
+}
+
+// blows FSR
+void TemplateTable::dup_x2() {
+	transition(vtos, vtos);
+	// stack: ..., a, b, c
+	__ load_ptr_and_tag(0, FSR, SSR);  // load c
+	__ load_ptr_and_tag(2, T5, T4);  // load a
+	__ store_ptr_and_tag(2, FSR, SSR); // store c in a
+	__ push_ptr(FSR, SSR);             // push c
+	// stack: ..., c, b, c, c
+	__ load_ptr_and_tag(2, FSR, SSR);  // load b
+	__ store_ptr_and_tag(2, T5, T4); // store a in b
+	// stack: ..., c, a, c, c
+	__ store_ptr_and_tag(1, FSR, SSR); // store b in c
+	// stack: ..., c, a, b, c
+}
+
+// blows FSR
+void TemplateTable::dup2() {
+	transition(vtos, vtos);
+	// stack: ..., a, b
+	__ load_ptr_and_tag(1, FSR, SSR);  // load a
+	__ push_ptr(FSR, SSR);             // push a
+	__ load_ptr_and_tag(1, FSR, SSR);  // load b
+	__ push_ptr(FSR, SSR);             // push b
+	// stack: ..., a, b, a, b
+}
+
+// blows FSR
+void TemplateTable::dup2_x1() {
+	transition(vtos, vtos);
+	// stack: ..., a, b, c
+	__ load_ptr_and_tag(0, T5, T4);  // load c
+	__ load_ptr_and_tag(1, FSR, SSR);  // load b
+	__ push_ptr(FSR, SSR);             // push b
+	__ push_ptr(T5, T4);             // push c
+	// stack: ..., a, b, c, b, c
+	__ store_ptr_and_tag(3, T5, T4); // store c in b
+	// stack: ..., a, c, c, b, c
+	__ load_ptr_and_tag(4, T5, T4);  // load a
+	__ store_ptr_and_tag(2, T5, T4); // store a in 2nd c
+	// stack: ..., a, c, a, b, c
+	__ store_ptr_and_tag(4, FSR, SSR); // store b in a
+	// stack: ..., b, c, a, b, c
+
+	// stack: ..., b, c, a, b, c
+}
+
+// blows FSR, SSR
+void TemplateTable::dup2_x2() {
+	transition(vtos, vtos);
+	// stack: ..., a, b, c, d
+	// stack: ..., a, b, c, d
+	__ load_ptr_and_tag(0, T5, T4);  // load d
+	__ load_ptr_and_tag(1, FSR, SSR);  // load c
+	__ push_ptr(FSR, SSR);             // push c
+	__ push_ptr(T5, T4);             // push d
+	// stack: ..., a, b, c, d, c, d
+	__ load_ptr_and_tag(4, FSR, SSR);  // load b
+	__ store_ptr_and_tag(2, FSR, SSR); // store b in d
+	__ store_ptr_and_tag(4, T5, T4); // store d in b
+	// stack: ..., a, d, c, b, c, d
+	__ load_ptr_and_tag(5, T5, T4);  // load a
+	__ load_ptr_and_tag(3, FSR, SSR);  // load c
+	__ store_ptr_and_tag(3, T5, T4); // store a in c
+	__ store_ptr_and_tag(5, FSR, SSR); // store c in a
+	// stack: ..., c, d, a, b, c, d
+
+	// stack: ..., c, d, a, b, c, d
+}
+
+// blows FSR
+void TemplateTable::swap() {
+	transition(vtos, vtos);
+	// stack: ..., a, b
+
+	__ load_ptr_and_tag(1, T5, T4);  // load a
+	__ load_ptr_and_tag(0, FSR, SSR);  // load b
+	__ store_ptr_and_tag(0, T5, T4); // store a in b
+	__ store_ptr_and_tag(1, FSR, SSR); // store b in a
+
+	// stack: ..., b, a
+}
+
+void TemplateTable::iop2(Operation op) {
+	transition(itos, itos);
+	switch (op) {
+		case add  :
+			__ pop_i(SSR);
+			__ addu(FSR, SSR, FSR);
+			break;
+		case sub  :
+			__ pop_i(SSR);
+			__ subu(FSR, SSR, FSR);
+			break;
+		case mul  :
+			__ lw(SSR, SP, 0);
+			__ mult(SSR, FSR);
+			__ addi(SP, SP, wordSize);
+			__ nop();
+			__ mflo(FSR);
+			break;
+		case _and :
+			__ pop_i(SSR);
+			__ andr(FSR, SSR, FSR);
+			break;
+		case _or  :
+			__ pop_i(SSR);
+			__ orr(FSR, SSR, FSR);
+			break;
+		case _xor :
+			__ pop_i(SSR);
+			__ xorr(FSR, SSR, FSR);
+			break;
+		case shl  :
+			__ pop_i(SSR);
+			__ sllv(FSR, SSR, FSR);
+			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
+		case shr  :
+			__ pop_i(SSR);
+			__ srav(FSR, SSR, FSR);
+			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
+		case ushr :
+			__ pop_i(SSR);
+			__ srlv(FSR, SSR, FSR);
+			break; // implicit masking of lower 5 bits by Intel shift instr. mips also
+		default   : ShouldNotReachHere();
+	}
+}
+
+// the result stored in FSR, SSR,
+// used registers : T2, T3
+void TemplateTable::lop2(Operation op) {
+	transition(ltos, ltos);
+	//__ pop2(T2, T3);
+	__ pop_l(T2, T3);
+	switch (op) {
+		case add :
+			__ addu(FSR, T2, FSR);
+			__ sltu(AT, FSR, T2);
+			__ addu(SSR, T3, SSR);
+			__ addu(SSR, SSR, AT);
+			break;
+		case sub :
+			__ subu(FSR, T2, FSR);
+			__ sltu(AT, T2, FSR);
+			__ subu(SSR, T3, SSR);
+			__ subu(SSR, SSR, AT);
+			break;
+		case _and:
+			__ andr(FSR, T2, FSR);
+			__ andr(SSR, T3, SSR);
+			break;
+		case _or :
+			__ orr(FSR, T2, FSR);
+			__ orr(SSR, T3, SSR);
+			break;
+		case _xor:
+			__ xorr(FSR, T2, FSR);
+			__ xorr(SSR, T3, SSR);
+			break;
+		default : ShouldNotReachHere();
+	}
+}
+
+// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
+// the result is 0x80000000
+// the godson2 cpu do the same, so we need not handle this specially like x86
+void TemplateTable::idiv() {
+	transition(itos, itos);
+	Label not_zero;
+	//__ pop(SSR);
+	__ pop_i(SSR);
+	__ div(SSR, FSR);
+
+	__ bne(FSR, ZERO, not_zero);
+	__ delayed()->nop();
+	//__ brk(7);
+	__ jmp(Interpreter::_throw_ArithmeticException_entry);
+	__ delayed()->nop();
+
+	__ bind(not_zero);
+	__ mflo(FSR);
+}
+
+void TemplateTable::irem() {
+	transition(itos, itos);
+	Label not_zero;
+	//__ pop(SSR);
+	__ pop_i(SSR);
+	__ div(SSR, FSR);
+
+	__ bne(FSR, ZERO, not_zero);
+	__ delayed()->nop();
+	//__ brk(7);
+	__ jmp(Interpreter::_throw_ArithmeticException_entry);
+	__ delayed()->nop();
+
+	__ bind(not_zero);
+	__ mfhi(FSR);
+}
+
+// the multiplier in SSR||FSR, the multiplicand in stack
+// the result in SSR||FSR
+// used registers : T2, T3
+void TemplateTable::lmul() {
+	transition(ltos, ltos);
+	Label zero, quick, done;
+
+	//__ lw(T2, SP, 0);
+	//__ lw(T3, SP, 4);
+	__ pop_l(T2, T3);
+	__ orr(AT, T2, FSR);
+	__ beq(AT, ZERO, zero);
+	//__ delayed()->addi(SP, SP, 2 * wordSize);
+	__ delayed()->nop();
+
+	__ orr(AT, T3, SSR);
+	__ beq(AT, ZERO, quick);
+	__ delayed()->nop();
+
+	__ multu(T2, SSR);
+	__ nop();
+	__ nop();
+	__ mflo(SSR);
+
+	__ multu(T3, FSR);
+	__ nop();
+	__ nop();
+	__ mflo(T3);
+
+	__ bind(quick);
+	__ multu(T2, FSR);
+	__ addu(SSR, SSR, T3);
+	__ nop();
+	__ mflo(FSR);
+	__ mfhi(T2);
+	__ b(done);
+	__ delayed()->addu(SSR, SSR, T2);
+
+	__ bind(zero);
+	__ move(SSR, ZERO);
+	__ bind(done);
+}
+
+// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
+void TemplateTable::ldiv() {
+	transition(ltos, ltos);
+	Label normal;
+
+	__ orr(AT, FSR, SSR);
+	__ bne(AT, ZERO, normal);
+	__ delayed()->nop();
+
+	//__ brk(7);		//generate FPE
+	__ jmp(Interpreter::_throw_ArithmeticException_entry);
+	__ delayed()->nop();
+
+	__ bind(normal);
+	__ move(A0, FSR);
+	__ move(A1, SSR);
+	//__ lw(A2, SP, 0);
+	//__ lw(A3, SP, 4);
+	//__ addi(SP, SP, 2 * wordSize);
+	__ pop_l (A2, A3);
+	__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), 4);
+}
+
+// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
+void TemplateTable::lrem() {
+	transition(ltos, ltos);
+	Label normal;
+
+	__ orr(AT, FSR, SSR);
+	__ bne(AT, ZERO, normal);
+	__ delayed()->nop();
+
+	__ jmp(Interpreter::_throw_ArithmeticException_entry);
+	__ delayed()->nop();
+
+	__ bind(normal);
+	__ move(A0, FSR);
+	__ move(A1, SSR);
+	__ pop_l (A2, A3);
+	__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), 4);
+}
+
+// result in SSR||FSR
+// used registers : T2, T3
+void TemplateTable::lshl() {
+	transition(itos, ltos);
+	Label normal, done, notZero;
+	__ pop_l(T2, T3);
+	__ andi(FSR, FSR, 0x3f);				// the bit to be shifted
+
+	__ bne(FSR, ZERO, notZero);
+	__ delayed()-> nop();
+
+	__ move(FSR, T2);
+	__ b(done);
+	__ delayed(); __ move(SSR, T3);
+
+	__ bind(notZero);
+	__ sltiu(AT, FSR, BitsPerWord);
+	__ bne(AT, ZERO, normal);				// <BitsPerWord?
+	__ delayed()->nop();
+
+	__ addi(AT, FSR, - BitsPerWord);
+	__ sllv(SSR, T2, AT);
+	__ b(done);
+	__ delayed(); __ move(FSR, ZERO);
+
+	__ bind(normal);
+	__ sllv(SSR, T3, FSR);
+	__ move(AT, BitsPerWord);
+	__ sub(AT, AT, FSR);
+	__ srlv(AT, T2, AT);
+	__ orr(SSR, SSR, AT);
+
+	__ sllv(FSR, T2, FSR);
+
+	__ bind(done);
+}
+
+// used registers : T2, T3
+void TemplateTable::lshr() {
+  transition(itos, ltos);
+	Label normal, done, notZero;
+        __ pop_l(T2, T3);
+	__ andi(FSR, FSR, 0x3f);	// the bit to be shifted
+
+	__ bne(FSR, ZERO, notZero);
+	__ delayed()-> nop();
+
+	__ move(FSR, T2);	      // zero shift must be handled specially
+	__ b(done);
+	__ delayed(); __ move(SSR, T3);
+
+	__ bind(notZero);
+	__ sltiu(AT, FSR, BitsPerWord);
+	__ bne(AT, ZERO, normal);	// shift < BitsPerWord?
+	__ delayed()->nop();
+
+	__ addi(AT, FSR, -BitsPerWord);	// quick
+	__ srav(FSR, T3, AT);
+	__ b(done);
+	__ delayed()->sra(SSR, T3, BitsPerWord-1);
+
+	__ bind(normal);
+	__ srav(SSR, T3, FSR);		// normal
+	__ move(AT, BitsPerWord);
+	__ sub(AT, AT, FSR);
+	__ srlv(FSR, T2, FSR);
+	__ sllv(AT, T3, AT);
+	__ orr(FSR, FSR, AT);
+
+	__ bind(done);
+}
+
+// used registers : T2, T3
+void TemplateTable::lushr() {
+	transition(itos, ltos);
+	Label normal, done, notZero;
+	__ pop_l(T2, T3);
+	__ andi(FSR, FSR, 0x3f);	// the bit to be shifted
+
+	__ bne(FSR, ZERO, notZero);
+	__ delayed()->nop();
+
+	__ move(FSR, T2);		// zero shift must be handled specially
+	__ b(done);
+	__ delayed(); __ move(SSR, T3);
+
+	__ bind(notZero);
+	__ sltiu(AT, FSR, BitsPerWord);
+	__ bne(AT, ZERO, normal);	// shift < BitsPerWord?
+	__ delayed()->nop();
+
+	__ addi(AT, FSR, - BitsPerWord);	// quick
+	__ srlv(FSR, T3, AT);
+	__ b(done);
+	__ delayed(); __ move(SSR, ZERO);
+
+	__ bind(normal);		// normal
+	__ srlv(SSR, T3, FSR);
+	__ move(AT, BitsPerWord);
+	__ sub(AT, AT, FSR);
+	__ srlv(FSR, T2, FSR);
+	__ sllv(AT, T3, AT);
+	__ orr(FSR, FSR, AT);
+
+	__ bind(done);
+}
+
+// result in FSF
+void TemplateTable::fop2(Operation op) {
+	transition(ftos, ftos);
+	__ pop_ftos_to_esp();  // pop ftos into esp
+	switch (op) {
+		case add:
+			__ lwc1(FTF, at_sp());
+			__ add_s(FSF, FTF, FSF);
+			break;
+		case sub:
+			__ lwc1(FTF, at_sp());
+			__ sub_s(FSF, FTF, FSF);
+			break;
+		case mul:
+			__ lwc1(FTF, at_sp());
+			__ mul_s(FSF, FTF, FSF);
+			break;
+		case div:
+			__ lwc1(FTF, at_sp());
+			__ div_s(FSF, FTF, FSF);
+			break;
+		case rem:
+			__ mfc1(FSR, FSF);
+			__ mtc1(FSR, F12);
+			__ lwc1(FTF, at_sp());
+			__ rem_s(FSF, FTF, F12);
+			break;
+		default : ShouldNotReachHere();
+	}
+
+	__ addi(SP, SP, 1 * wordSize);
+}
+
+// result in SSF||FSF
+// i dont handle the strict flags
+void TemplateTable::dop2(Operation op) {
+	transition(dtos, dtos);
+	__ pop_dtos_to_esp();  // pop dtos into esp
+	switch (op) {
+		case add:
+			__ lwc1(FTF, at_sp());
+			__ lwc1(STF, at_sp_p1());
+			__ add_d(FSF, FTF, FSF);
+			break;
+		case sub:
+			__ lwc1(FTF, at_sp());
+			__ lwc1(STF, at_sp_p1());
+			__ sub_d(FSF, FTF, FSF);
+			break;
+		case mul:
+			__ lwc1(FTF, at_sp());
+			__ lwc1(STF, at_sp_p1());
+			__ mul_d(FSF, FTF, FSF);
+			break;
+		case div:
+			__ lwc1(FTF, at_sp());
+			__ lwc1(STF, at_sp_p1());
+			__ div_d(FSF, FTF, FSF);
+			break;
+		case rem:
+			__ mfc1(FSR, FSF);
+			__ mfc1(SSR, SSF);
+			__ mtc1(FSR, F12);
+			__ mtc1(SSR, F13);
+			__ lwc1(FTF, at_sp());
+			__ lwc1(STF, at_sp_p1());
+			__ rem_d(FSF, FTF, F12);
+			break;
+		default : ShouldNotReachHere();
+	}
+
+	__ addi(SP, SP, 2 * wordSize);
+}
+
+void TemplateTable::ineg() {
+	transition(itos, itos);
+	__ neg(FSR);
+}
+
+void TemplateTable::lneg() {
+	transition(ltos, ltos);
+	__ nor(FSR, ZERO, FSR);
+	__ addiu(FSR, FSR, 1);
+	__ sltiu(AT, FSR, 1);
+	__ nor(SSR, ZERO, SSR);
+	__ addu(SSR, SSR, AT);
+}
+/*
+// Note: 'double' and 'long long' have 32-bits alignment on x86.
+static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
+  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
+  // of 128-bits operands for SSE instructions.
+  jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
+  // Store the value to a 128-bits operand.
+  operand[0] = lo;
+  operand[1] = hi;
+  return operand;
+}
+
+// Buffer for 128-bits masks used by SSE instructions.
+static jlong float_signflip_pool[2*2];
+static jlong double_signflip_pool[2*2];
+*/
+void TemplateTable::fneg() {
+	transition(ftos, ftos);
+	__ neg_s(FSF, FSF);
+}
+
+void TemplateTable::dneg() {
+	transition(dtos, dtos);
+	__ neg_d(FSF, FSF);
+}
+
+// used registers : T2
+void TemplateTable::iinc() {
+	transition(vtos, vtos);
+	locals_index(T2);
+	__ lw(FSR, T2, 0);
+	__ lb(AT, at_bcp(2));           // get constant
+	__ addu(FSR, FSR, AT);
+	__ sw(FSR, T2, 0);
+}
+
+// used register : T2
+void TemplateTable::wide_iinc() {
+	transition(vtos, vtos);
+	locals_index_wide(T2);
+	__ load_two_bytes_from_at_bcp(FSR, AT, 4);
+	__ hswap(FSR);
+	__ lw(AT, T2, 0);
+	__ addu(FSR, AT, FSR);
+	__ sw(FSR, T2, 0);
+}
+
+void TemplateTable::convert() {
+	// Checking
+#ifdef ASSERT
+	{ TosState tos_in  = ilgl;
+		TosState tos_out = ilgl;
+		switch (bytecode()) {
+			case Bytecodes::_i2l: // fall through
+			case Bytecodes::_i2f: // fall through
+			case Bytecodes::_i2d: // fall through
+			case Bytecodes::_i2b: // fall through
+			case Bytecodes::_i2c: // fall through
+			case Bytecodes::_i2s: tos_in = itos; break;
+			case Bytecodes::_l2i: // fall through
+			case Bytecodes::_l2f: // fall through
+			case Bytecodes::_l2d: tos_in = ltos; break;
+			case Bytecodes::_f2i: // fall through
+			case Bytecodes::_f2l: // fall through
+			case Bytecodes::_f2d: tos_in = ftos; break;
+			case Bytecodes::_d2i: // fall through
+			case Bytecodes::_d2l: // fall through
+			case Bytecodes::_d2f: tos_in = dtos; break;
+			default             : ShouldNotReachHere();
+		}
+		switch (bytecode()) {
+			case Bytecodes::_l2i: // fall through
+			case Bytecodes::_f2i: // fall through
+			case Bytecodes::_d2i: // fall through
+			case Bytecodes::_i2b: // fall through
+			case Bytecodes::_i2c: // fall through
+			case Bytecodes::_i2s: tos_out = itos; break;
+			case Bytecodes::_i2l: // fall through
+			case Bytecodes::_f2l: // fall through
+			case Bytecodes::_d2l: tos_out = ltos; break;
+			case Bytecodes::_i2f: // fall through
+			case Bytecodes::_l2f: // fall through
+			case Bytecodes::_d2f: tos_out = ftos; break;
+			case Bytecodes::_i2d: // fall through
+			case Bytecodes::_l2d: // fall through
+			case Bytecodes::_f2d: tos_out = dtos; break;
+			default             : ShouldNotReachHere();
+		}
+		transition(tos_in, tos_out);
+	}
+#endif // ASSERT
+
+	// Conversion
+	// (Note: use pushl(ecx)/popl(ecx) for 1/2-word stack-ptr manipulation)
+	switch (bytecode()) {
+		case Bytecodes::_i2l:
+			__ extend_sign(SSR, FSR);
+			break;
+		case Bytecodes::_i2f:
+			__ mtc1(FSR, FSF);
+			__ cvt_s_w(FSF, FSF);
+			break;
+		case Bytecodes::_i2d:
+			__ mtc1(FSR, FSF);
+			__ cvt_d_w(FSF, FSF);
+			break;
+		case Bytecodes::_i2b:
+			__ shl(FSR, 24);
+			__ sar(FSR, 24);
+			break;
+		case Bytecodes::_i2c:
+			__ andi(FSR, FSR, 0xFFFF);  // truncate upper 16 bits
+			break;
+		case Bytecodes::_i2s:
+			__ shl(FSR, 16);
+			__ sar(FSR, 16);
+			break;
+		case Bytecodes::_l2i:
+			/* nothing to do */
+			break;
+		case Bytecodes::_l2f:
+			__ mtc1(FSR, FSF);
+			__ mtc1(SSR, SSF);
+			__ cvt_s_l(FSF, FSF);
+			break;
+		case Bytecodes::_l2d:
+			__ mtc1(FSR, FSF);
+			__ mtc1(SSR, SSF);
+			__ cvt_d_l(FSF, FSF);
+			break;
+		case Bytecodes::_f2i:
+			{
+				Label L;
+				__ c_un_s(FSF, FSF);		//NaN?
+				__ bc1t(L);
+				__ delayed();
+				__ move(FSR, ZERO);
+
+				__ trunc_w_s(FSF, FSF);
+				__ mfc1(FSR, FSF);
+				__ bind(L);
+			}
+			break;
+		case Bytecodes::_f2l:
+			{
+				Label L;
+				__ move(SSR, ZERO);
+				__ c_un_s(FSF, FSF);		//NaN?
+				__ bc1t(L);
+				__ delayed();
+				__ move(FSR, ZERO);
+
+				__ trunc_l_s(FSF, FSF);
+				__ mfc1(FSR, FSF);
+				__ mfc1(SSR, SSF);
+				__ bind(L);
+			}
+			break;
+		case Bytecodes::_f2d:
+			__ cvt_d_s(FSF, FSF);
+			break;
+		case Bytecodes::_d2i:
+			{
+				Label L;
+				__ c_un_d(FSF, FSF);		//NaN?
+				__ bc1t(L);
+				__ delayed();
+				__ move(FSR, ZERO);
+
+				__ trunc_w_d(FSF, FSF);
+				__ mfc1(FSR, FSF);
+				__ bind(L);
+			}
+			break;
+		case Bytecodes::_d2l:
+			{
+				Label L;
+				__ move(SSR, ZERO);
+				__ c_un_d(FSF, FSF);		//NaN?
+				__ bc1t(L);
+				__ delayed();
+				__ move(FSR, ZERO);
+
+				__ trunc_l_d(FSF, FSF);
+				__ mfc1(FSR, FSF);
+				__ mfc1(SSR, SSF);
+				__ bind(L);
+			}
+			break;
+		case Bytecodes::_d2f:
+			__ cvt_s_d(FSF, FSF);
+			break;
+		default             :
+			ShouldNotReachHere();
+	}
+}
+
+void TemplateTable::lcmp() {
+	transition(ltos, itos);
+
+	Label low, high, done;
+	__ lw(T3, SP, 4);
+//	__ pop_l(T2, T3);
+	__ slt(AT, T3, SSR);
+	__ bne(AT, ZERO, low);
+	__ delayed()->addi(SP, SP, 8);
+//	__ delayed()->nop();
+
+	__ slt(AT, SSR, T3);
+	__ bne(AT, ZERO, high);
+	__ delayed()->nop();
+
+	__ lw(T2, SP, -8);
+	__ sltu(AT, T2, FSR);
+	__ bne(AT, ZERO, low);
+	__ delayed();
+
+	__ sltu(AT, FSR, T2);
+	__ bne(AT, ZERO, high);
+	__ delayed()->nop();
+
+	__ b(done);
+	__ delayed(); __ move(FSR, 0);
+
+	__ bind(low);
+	__ b(done);
+	__ delayed(); __ move(FSR, -1);
+
+	__ bind(high);
+	__ b(done);
+	__ delayed(); __ move(FSR, 1);
+
+	__ bind(done);
+}
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result) {
+	Label less, done;
+
+	__ move(FSR, ZERO);
+
+	if (is_float) {
+		__ pop_ftos_to_esp();
+		__ lwc1(FTF, at_sp());
+		__ c_eq_s(FTF, FSF);
+		__ bc1t(done);
+		__ delayed()->addi(SP, SP, 1 * wordSize);
+
+		if (unordered_result<0)
+			__ c_ult_s(FTF, FSF);
+		else
+			__ c_olt_s(FTF, FSF);
+	} else {
+		__ pop_dtos_to_esp();
+		__ lwc1(FTF, at_sp());
+		__ lwc1(STF, at_sp_p1());
+		__ c_eq_d(FTF, FSF);
+		__ bc1t(done);
+		__ delayed()->addi(SP, SP, 2 * wordSize);
+
+		if (unordered_result<0)
+			__ c_ult_d(FTF, FSF);
+		else
+			__ c_olt_d(FTF, FSF);
+	}
+	__ bc1t(less);
+	__ delayed()->nop();
+	__ b(done);
+	__ delayed(); __ move(FSR, 1);
+	__ bind(less);
+	__ move(FSR, -1);
+	__ bind(done);
+}
+
+
+// used registers : T3, T4, T7
+// FSR : return bci, this is defined by the vm specification
+// T3 : method
+// T4 : offset
+// T7 : next bytecode, this is required by dispatch_base
+void TemplateTable::branch(bool is_jsr, bool is_wide) {
+	__ get_method(T3);
+	__ profile_taken_branch(T4, T7);		// only C2 meaningful
+
+#ifndef CORE
+	const ByteSize be_offset = methodOopDesc::backedge_counter_offset()
+		+ InvocationCounter::counter_offset();
+	const ByteSize inv_offset = methodOopDesc::invocation_counter_offset()
+		+ InvocationCounter::counter_offset();
+	const int method_offset = frame::interpreter_frame_method_offset * wordSize;
+#endif // CORE
+
+	// Load up T4 with the branch displacement
+	if (!is_wide) {
+		__ load_two_bytes_from_at_bcp(T4, AT, 1);
+		__ hswap(T4);
+	} else {
+		__ lw(T4, at_bcp(1));
+		__ swap(T4);
+	}
+
+	// Handle all the JSR stuff here, then exit.
+	// It's much shorter and cleaner than intermingling with the
+	// non-JSR normal-branch stuff occuring below.
+	if (is_jsr) {
+		// Pre-load the next target bytecode into T7
+		__ add(AT, BCP, T4);
+		__ lbu(T7, AT, 0);
+
+		// compute return address as bci in FSR
+		__ addi(FSR, BCP, (is_wide?5:3) - in_bytes(constMethodOopDesc::codes_offset()));
+		__ lw(AT, T3, in_bytes(methodOopDesc::const_offset()));
+		__ sub(FSR, FSR, AT);
+		// Adjust the bcp in BCP by the displacement in T4
+		__ add(BCP, BCP, T4);
+		// jsr returns atos that is not an oop
+		// __ dispatch_only_noverify(atos);
+		// Push return address
+		//   __ push_i(eax);
+		__ push_i(FSR);
+		// jsr returns vtos
+		__ dispatch_only_noverify(vtos);
+
+		return;
+	}
+
+	// Normal (non-jsr) branch handling
+
+	// Adjust the bcp in S0 by the displacement in T4
+	__ add(BCP, BCP, T4);
+
+#ifdef CORE
+	// Pre-load the next target bytecode into EBX
+	__ lbu(T7, BCP, 0);
+	// continue with the bytecode @ target
+	__ dispatch_only(vtos);
+#else
+	assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
+	Label backedge_counter_overflow;
+	Label profile_method;
+	Label dispatch;
+	if (UseLoopCounter) {
+		// increment backedge counter for backward branches
+		// eax: MDO
+		// ebx: MDO bumped taken-count
+		// T3: method
+		// T4: target offset
+		// BCP: target bcp
+		// LVP: locals pointer
+		__ bgtz(T4, dispatch);	// check if forward or backward branch
+		__ delayed()->nop();
+
+		// increment back edge counter
+		__ lw(T0, T3, in_bytes(be_offset));
+		__ increment(T0, InvocationCounter::count_increment);
+		__ sw(T0, T3, in_bytes(be_offset));
+
+		// load invocation counter
+		__ lw(T1, T3, in_bytes(inv_offset));
+		// buffer bit added, mask no needed
+		// by yjl 10/24/2005
+		//__ move(AT, InvocationCounter::count_mask_value);
+		//__ andr(T1, T1, AT);
+
+		// add backedge counter & invocation counter
+		__ add(T1, T1, T0);
+
+		if (ProfileInterpreter) {
+			// Test to see if we should create a method data oop
+			__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterProfileLimit)));
+			__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterProfileLimit)));
+			__ slt(AT, T1, AT);
+			__ bne(AT, ZERO, dispatch);
+			__ delayed()->nop();
+
+			// if no method data exists, go to profile method
+			__ test_method_data_pointer(T1, profile_method);
+
+			if (UseOnStackReplacement) {
+				// check for overflow against ebx which is the MDO taken count
+				__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
+				__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
+				// the value T7 Is get from the beginning profile_taken_branch
+				__ slt(AT, T7, AT);
+				__ bne(AT, ZERO, dispatch);
+				__ delayed()->nop();
+
+				// When ProfileInterpreter is on, the backedge_count comes
+				// from the methodDataOop, which value does not get reset on
+				// the call to  frequency_counter_overflow().
+				// To avoid excessive calls to the overflow routine while
+				// the method is being compiled, add a second test to make
+				// sure the overflow function is called only once every
+				// overflow_frequency.
+				const int overflow_frequency = 1024;
+				__ andi(T7, T7, overflow_frequency-1);
+				__ beq(T7, ZERO, backedge_counter_overflow);
+				__ delayed()->nop();
+			}
+		} else {
+			if (UseOnStackReplacement) {
+				// check for overflow against eax, which is the sum of the counters
+				__ lui(AT, Assembler::split_high(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
+				__ lw(AT, AT, Assembler::split_low(int(&InvocationCounter::InterpreterBackwardBranchLimit)));
+				__ slt(AT, T1, AT);
+				__ beq(AT, ZERO, backedge_counter_overflow);
+				__ delayed()->nop();
+			}
+		}
+		__ bind(dispatch);
+	}
+
+	// Pre-load the next target bytecode into T7
+	__ lbu(T7, BCP, 0);
+
+	// continue with the bytecode @ target
+	// FSR: return bci for jsr's, unused otherwise
+	// T7: target bytecode
+	// BCP: target bcp
+	__ dispatch_only(vtos);
+
+	if (UseLoopCounter) {
+		if (ProfileInterpreter) {
+			// Out-of-line code to allocate method data oop.
+			__ bind(profile_method);
+			__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method), BCP);
+			__ lbu(T2, BCP, 0);
+			__ lw(T3, FP, method_offset);
+			__ lw(T3, T3, in_bytes(methodOopDesc::method_data_offset()));
+			__ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize);
+			__ test_method_data_pointer(T3, dispatch);
+			// offset non-null mdp by MDO::data_offset() + IR::profile_method()
+			__ addi(T3, T3, in_bytes(methodDataOopDesc::data_offset()));
+			__ add(T3, T3, T1);
+			__ sw(T3, FP, frame::interpreter_frame_mdx_offset * wordSize);
+			__ b(dispatch);
+			__ delayed()->nop();
+		}
+
+		if (UseOnStackReplacement) {
+			// invocation counter overflow
+			__ bind(backedge_counter_overflow);
+			__ sub(T4, BCP, T4);	// branch bcp
+			call_VM(NOREG, CAST_FROM_FN_PTR(address,
+						InterpreterRuntime::frequency_counter_overflow), T4);
+			__ lbu(T7, BCP, 0);
+
+			// V0: osr nmethod (osr ok) or NULL (osr not possible)
+			// V1: osr adapter frame return address
+			// T7: target bytecode
+			// LVP: locals pointer
+			// BCP: bcp
+			__ beq(V0, ZERO, dispatch);
+			__ delayed()->nop();
+			// nmethod may have been invalidated (VM may block upon call_VM return)
+			__ lw(T3, V0, nmethod::entry_bci_offset());
+			__ move(AT, InvalidOSREntryBci);
+			__ beq(AT, T3, dispatch);
+			__ delayed()->nop();
+			// We need to prepare to execute the OSR method. First we must
+			// migrate the locals and monitors off of the stack.
+			//eax V0: osr nmethod (osr ok) or NULL (osr not possible)
+			//ebx V1: osr adapter frame return address
+			//edx  T7: target bytecode
+			//edi  LVP: locals pointer
+			//esi  BCP: bcp
+			//__ movl(esi, eax);          // save the nmethod
+			__ move(BCP, V0);
+			// const Register thread = ecx;
+			const Register thread = T8;
+			__ get_thread(thread);
+			call_VM(noreg, CAST_FROM_FN_PTR(address,
+						SharedRuntime::OSR_migration_begin));
+			// eax is OSR buffer, move it to expected parameter location
+			//refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
+			// __ movl(ecx, eax);
+			__ move(T0, V0);
+
+			// pop the interpreter frame
+			//  __ movl(edx, Address(ebp, frame::interpreter_frame_sender_sp_offset
+			//  * wordSize)); // get sender sp
+			__ lw(T8, Address(FP,
+				frame::interpreter_frame_sender_sp_offset * wordSize));
+		//FIXME, shall we keep the return address on the stack?
+			__ leave();                                // remove frame anchor
+			// __ popl(edi);                         // get return address
+			//__ addi(SP, SP, wordSize);               // get return address
+		       //   __ pop(LVP);
+			__ move(LVP, RA);
+		       // __ movl(esp, edx);                         // set sp to sender sp
+			__ move(SP, T8 );
+
+			Label skip;
+			Label chkint;
+
+			// The interpreter frame we have removed may be returning to
+			// either the callstub or the interpreter. Since we will
+			// now be returning from a compiled (OSR) nmethod we must
+			// adjust the return to the return were it can handler compiled
+			// results and clean the fpu stack. This is very similar to
+			// what a i2c adapter must do.
+
+			// Are we returning to the call stub?
+#if 0
+			// __ cmpl(edi, (int)StubRoutines::_call_stub_return_address);
+			__ addi(AT, LVP, -(int)StubRoutines::_call_stub_return_address);
+			//  __ jcc(Assembler::notEqual, chkint);
+			__ bne(AT, ZERO, chkint);
+			__ delayed()->nop();
+			// yes adjust to the specialized call stub  return.
+			// assert(StubRoutines::i486::get_call_stub_compiled_return() != NULL,
+			// "must be set");
+			assert(StubRoutines::gs2::get_call_stub_compiled_return() != NULL,
+					"must be set");
+			// __ movl(edi, (intptr_t) StubRoutines::i486::get_call_stub_compiled_return());
+			__ move(LVP, (intptr_t) StubRoutines::gs2::get_call_stub_compiled_return());
+			//  __ jmp(skip);
+			__ b(skip);
+			__ delayed()->nop();
+			__ bind(chkint);
+
+			// Are we returning to the interpreter? Look for sentinel
+
+			//__ cmpl(Address(edi, -8), Interpreter::return_sentinel);
+			__ lw(AT, LVP , -8);
+			__ addi(AT, AT, -Interpreter::return_sentinel);
+			//__ jcc(Assembler::notEqual, skip);
+			__ bne(AT, ZERO, skip);
+			__ delayed()->nop();
+			// Adjust to compiled return back to interpreter
+
+			// __ movl(edi, Address(edi, -4));
+			__ lw(LVP, LVP, -4);
+
+			__ bind(skip);
+#endif
+			// Align stack pointer for compiled code (note that caller is
+			// responsible for undoing this fixup by remembering the old SP
+			// in an ebp-relative location)
+			//  __ andl(esp, -(StackAlignmentInBytes));
+		        __ move(AT, -(StackAlignmentInBytes));
+			__ andr(SP , SP , AT);
+			// push the (possibly adjusted) return address
+			//  __ pushl(edi);
+			//__ push(LVP);
+//			__ move(RA, LVP);
+			// and begin the OSR nmethod
+			//  __ jmp(Address(esi, nmethod::osr_entry_point_offset()));
+		//refer to osr_entry in c1_LIRAssembler_mips.cpp
+			__ lw(AT, BCP, nmethod::osr_entry_point_offset());
+			__ jr(AT);
+			__ delayed()->nop();
+		}
+	}
+#endif // not CORE
+}
+
+void TemplateTable::if_0cmp(Condition cc) {
+	transition(itos, vtos);
+	// assume branch is more often taken than not (loops use backward branches)
+	Label not_taken;
+	switch(cc) {
+		case not_equal:
+			__ beq(FSR, ZERO, not_taken);
+			break;
+		case equal:
+			__ bne(FSR, ZERO, not_taken);
+			break;
+		case less:
+			__ bgez(FSR, not_taken);
+			break;
+		case less_equal:
+			__ bgtz(FSR, not_taken);
+			break;
+		case greater:
+			__ blez(FSR, not_taken);
+			break;
+		case greater_equal:
+			__ bltz(FSR, not_taken);
+			break;
+	}
+	__ delayed()->nop();
+
+	branch(false, false);
+
+	__ bind(not_taken);
+	__ profile_not_taken_branch(FSR);
+}
+
+
+void TemplateTable::if_icmp(Condition cc) {
+	transition(itos, vtos);
+	// assume branch is more often taken than not (loops use backward branches)
+	Label not_taken;
+	//__ lw(SSR, SP, 0);
+
+	__ pop_i(SSR);
+	switch(cc) {
+		case not_equal:
+			__ beq(SSR, FSR, not_taken);
+			break;
+		case equal:
+			__ bne(SSR, FSR, not_taken);
+			break;
+		case less:
+			__ slt(AT, SSR, FSR);
+			__ beq(AT, ZERO, not_taken);
+			break;
+		case less_equal:
+			__ slt(AT, FSR, SSR);
+			__ bne(AT, ZERO, not_taken);
+			break;
+		case greater:
+			__ slt(AT, FSR, SSR);
+			__ beq(AT, ZERO, not_taken);
+			break;
+		case greater_equal:
+			__ slt(AT, SSR, FSR);
+			__ bne(AT, ZERO, not_taken);
+			break;
+	}
+	//	__ delayed()->addi(SP, SP, 1 * wordSize);
+	__ delayed()->nop();
+
+	branch(false, false);
+
+	__ bind(not_taken);
+	__ profile_not_taken_branch(FSR);
+}
+
+
+void TemplateTable::if_nullcmp(Condition cc) {
+	transition(atos, vtos);
+	// assume branch is more often taken than not (loops use backward branches)
+	Label not_taken;
+	switch(cc) {
+		case not_equal:
+			__ beq(FSR, ZERO, not_taken);
+			break;
+		case equal:
+			__ bne(FSR, ZERO, not_taken);
+			break;
+		default:
+			ShouldNotReachHere();
+	}
+	__ delayed()->nop();
+
+	branch(false, false);
+
+	__ bind(not_taken);
+	__ profile_not_taken_branch(FSR);
+}
+
+
+void TemplateTable::if_acmp(Condition cc) {
+	transition(atos, vtos);
+	// assume branch is more often taken than not (loops use backward branches)
+	Label not_taken;
+	//	__ lw(SSR, SP, 0);
+	__ pop_ptr(SSR);
+	switch(cc) {
+		case not_equal:
+			__ beq(SSR, FSR, not_taken);
+			break;
+		case equal:
+			__ bne(SSR, FSR, not_taken);
+			break;
+		default:
+			ShouldNotReachHere();
+	}
+	//	__ delayed()->addi(SP, SP, 4);
+	__ delayed()->nop();
+
+	branch(false, false);
+
+	__ bind(not_taken);
+	__ profile_not_taken_branch(FSR);
+}
+
+// used registers : T1, T2, T3
+// T1 : method
+// T2 : returb bci
+void TemplateTable::ret() {
+	transition(vtos, vtos);
+
+	locals_index(T2);
+	__ lw(T2, T2, 0);
+	__ profile_ret(T2, T3);
+
+	__ get_method(T1);
+	__ lw(BCP, T1, in_bytes(methodOopDesc::const_offset()));
+	__ add(BCP, BCP, T2);
+	__ addi(BCP, BCP, in_bytes(constMethodOopDesc::codes_offset()));
+
+	__ dispatch_next(vtos);
+}
+
+// used registers : T1, T2, T3
+// T1 : method
+// T2 : returb bci
+void TemplateTable::wide_ret() {
+	transition(vtos, vtos);
+
+	locals_index_wide(T2);
+	__ lw(T2, T2, 0);                   // get return bci, compute return bcp
+	__ profile_ret(T2, T3);
+
+	__ get_method(T1);
+	__ lw(BCP, T1, in_bytes(methodOopDesc::const_offset()));
+	__ add(BCP, BCP, T2);
+	__ addi(BCP, BCP, in_bytes(constMethodOopDesc::codes_offset()));
+
+	__ dispatch_next(vtos);
+}
+
+// used register T2, T3, T4, T7
+// T2 : bytecode pointer
+// T3 : low
+// T4 : high
+// T7 : dest bytecode, required by dispatch_base
+void TemplateTable::tableswitch() {
+	Label default_case, continue_execution;
+	transition(itos, vtos);
+
+	// align BCP
+	__ addi(T2, BCP, wordSize);
+	__ move(AT, -wordSize);
+	__ andr(T2, T2, AT);
+
+	// load lo & hi
+	__ lw(T3, T2, 1*wordSize);
+	__ swap(T3);
+	__ lw(T4, T2, 2*wordSize);
+	__ swap(T4);
+
+	// check against lo & hi
+	__ slt(AT, FSR, T3);
+	__ bne(AT, ZERO, default_case);
+	__ delayed()->nop();
+
+	__ slt(AT, T4, FSR);
+	__ bne(AT, ZERO, default_case);
+	__ delayed()->nop();
+
+	// lookup dispatch offset, in T4 big endian
+	__ sub(FSR, FSR, T3);
+	__ sll(AT, FSR, 2);
+	__ add(AT, T2, AT);
+	__ lw(T4, AT, 3*wordSize);
+	__ profile_switch_case(FSR, T2, T3);
+
+	__ bind(continue_execution);
+	__ swap(T4);
+	__ add(BCP, BCP, T4);
+	__ lbu(T7, BCP, 0);
+	__ dispatch_only(vtos);
+
+	// handle default
+	__ bind(default_case);
+	__ profile_switch_default(FSR);
+	__ lw(T4, T2, 0);
+	__ b(continue_execution);
+	__ delayed()->nop();
+}
+
+void TemplateTable::lookupswitch() {
+	transition(itos, itos);
+	__ stop("lookupswitch bytecode should have been rewritten");
+}
+
+// used registers : T2, T3, T4, T7
+// T2 : bytecode pointer
+// T3 : pair index
+// T4 : offset
+// T7 : dest bytecode
+// the data after the opcode is the same as lookupswith
+// see Rewriter::rewrite_method for more information
+void TemplateTable::fast_linearswitch() {
+	transition(itos, vtos);
+	Label loop_entry, loop, found, continue_execution;
+
+	// swap eax so we can avoid swapping the table entries
+	__ swap(FSR);
+
+	// align BCP
+	__ addi(T2, BCP, wordSize);
+	__ move(AT, -wordSize);
+	__ andr(T2, T2, AT);
+
+	// set counter
+	__ lw(T3, T2, wordSize);
+	__ swap(T3);
+	__ b(loop_entry);
+	__ delayed()->nop();
+
+	// table search
+	__ bind(loop);
+	// get the entry value
+	__ sll(AT, T3, 3);
+	__ add(AT, T2, AT);
+	__ lw(AT, AT, 2 * wordSize);
+
+	// found?
+	__ beq(FSR, AT, found);
+	__ delayed()->nop();
+
+	__ bind(loop_entry);
+	__ bgtz(T3, loop);
+	__ delayed()->addiu(T3, T3, -1);
+
+	// default case
+	__ profile_switch_default(FSR);
+	__ lw(T4, T2, 0);
+	__ b(continue_execution);
+	__ delayed()->nop();
+
+	// entry found -> get offset
+	__ bind(found);
+	__ sll(AT, T3, 3);
+	__ add(AT, T2, AT);
+	__ lw(T4, AT, 3 * wordSize);
+	__ profile_switch_case(T3, FSR, T2);
+
+	// continue execution
+	__ bind(continue_execution);
+	__ swap(T4);
+	__ add(BCP, BCP, T4);
+	__ lbu(T7, BCP, 0);
+	__ dispatch_only(vtos);
+}
+
+// used registers : T0, T1, T2, T3, T4, T7
+// T2 : pairs address(array)
+// T7 : dest bytecode
+// the data after the opcode is the same as lookupswith
+// see Rewriter::rewrite_method for more information
+void TemplateTable::fast_binaryswitch() {
+	transition(itos, vtos);
+	// Implementation using the following core algorithm:
+	//
+	// int binary_search(int key, LookupswitchPair* array, int n) {
+	//   // Binary search according to "Methodik des Programmierens" by
+	//   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+	//   int i = 0;
+	//   int j = n;
+	//   while (i+1 < j) {
+	//     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+	//     // with      Q: for all i: 0 <= i < n: key < a[i]
+	//     // where a stands for the array and assuming that the (inexisting)
+	//     // element a[n] is infinitely big.
+	//     int h = (i + j) >> 1;
+	//     // i < h < j
+	//     if (key < array[h].fast_match()) {
+	//       j = h;
+	//     } else {
+	//       i = h;
+	//     }
+	//   }
+	//   // R: a[i] <= key < a[i+1] or Q
+	//   // (i.e., if key is within array, i is the correct index)
+	//   return i;
+	// }
+
+	// register allocation
+	const Register array = T2;
+	const Register i=T3, j=T4;
+	const Register h=T1;
+	const Register temp=T0;
+	const Register key=FSR;
+
+	// setup array
+	__ addi(array, BCP, 3*wordSize);
+	__ move(AT, -wordSize);
+	__ andr(array, array, AT);
+
+	// initialize i & j
+	__ move(i, ZERO);
+	__ lw(j, array, - 1 * wordSize);
+	// Convert j into native byteordering
+	__ swap(j);
+
+	// and start
+	Label entry;
+	__ b(entry);
+	__ delayed()->nop();
+
+	// binary search loop
+	{
+		Label loop;
+		__ bind(loop);
+		// int h = (i + j) >> 1;
+		__ add(h, i, j);
+		__ shr(h, 1);
+		// if (key < array[h].fast_match()) {
+		//   j = h;
+		// } else {
+		//   i = h;
+		// }
+		// Convert array[h].match to native byte-ordering before compare
+		__ sll(AT, h, 3);
+		__ add(AT, array, AT);
+		__ lw(temp, AT, 0*wordSize);
+		__ swap(temp);
+
+		{
+			Label set_i, end_of_if;
+			__ slt(AT, key, temp);
+			__ beq(AT, ZERO, set_i);
+			__ delayed()->nop();
+
+			__ b(end_of_if);
+			__ delayed(); __ move(j, h);
+
+			__ bind(set_i);
+			__ move(i, h);
+
+			__ bind(end_of_if);
+		}
+		// while (i+1 < j)
+		__ bind(entry);
+		__ addi(h, i, 1);
+		__ slt(AT, h, j);
+		__ bne(AT, ZERO, loop);
+		__ delayed()->nop();
+	}
+
+	// end of binary search, result index is i (must check again!)
+	Label default_case;
+	// Convert array[i].match to native byte-ordering before compare
+	__ sll(AT, i, 3);
+	__ add(AT, array, AT);
+	__ lw(temp, AT, 0 * wordSize);
+	__ swap(temp);
+	__ bne(key, temp, default_case);
+	__ delayed()->nop();
+
+	// entry found -> j = offset
+	__ sll(AT, i, 3);
+	__ add(AT, array, AT);
+	__ lw(j, AT, 1 * wordSize);
+	__ profile_switch_case(i, key, array);
+	__ swap(j);
+
+	__ add(BCP, BCP, j);
+	__ lbu(T7, BCP, 0);
+	__ dispatch_only(vtos);
+
+	// default case -> j = default offset
+	__ bind(default_case);
+	__ profile_switch_default(i);
+	__ lw(j, array, - 2 * wordSize);
+	__ swap(j);
+	__ add(BCP, BCP, j);
+	__ lbu(T7, BCP, 0);
+	__ dispatch_only(vtos);
+}
+
+void TemplateTable::_return(TosState state) {
+	transition(state, state);
+	assert(_desc->calls_vm(), "inconsistent calls_vm information"); // call in remove_activation
+	if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+		assert(state == vtos, "only valid state");
+		//__ movl(eax, aaddress(0));
+		__ lw(T4, aaddress(0));
+		//__ movl(edi, Address(eax, oopDesc::klass_offset_in_bytes()));
+		__ lw(LVP, T4, oopDesc::klass_offset_in_bytes());
+		//__ movl(edi, Address(edi, Klass::access_flags_offset_in_bytes()
+		//+ sizeof(oopDesc)));
+		__ lw(LVP, LVP, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+		//__ testl(edi, JVM_ACC_HAS_FINALIZER);
+		__ move(AT, JVM_ACC_HAS_FINALIZER);
+		__ andr(AT, AT, LVP);//by_css
+		//__ andi(AT, LVP, JVM_ACC_HAS_FINALIZER);
+		Label skip_register_finalizer;
+		//__ jcc(Assembler::zero, skip_register_finalizer);
+		__ beq(AT, ZERO, skip_register_finalizer);
+		__ delayed()->nop();
+		//__ call_VM(noreg, CAST_FROM_FN_PTR(address,
+		//InterpreterRuntime::register_finalizer), eax);
+		__ call_VM(noreg, CAST_FROM_FN_PTR(address,
+					InterpreterRuntime::register_finalizer), T4);
+		__ bind(skip_register_finalizer);
+	}
+	__ remove_activation(state, T9);
+
+	__ jr(T9);
+	__ delayed()->nop();
+}
+
+// ----------------------------------------------------------------------------
+// Volatile variables demand their effects be made known to all CPU's
+// in order.  Store buffers on most chips allow reads & writes to
+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
+// without some kind of memory barrier (i.e., it's not sufficient that
+// the interpreter does not reorder volatile references, the hardware
+// also must not reorder them).
+//
+// According to the new Java Memory Model (JMM):
+// (1) All volatiles are serialized wrt to each other.  ALSO reads &
+//     writes act as aquire & release, so:
+// (2) A read cannot let unrelated NON-volatile memory refs that
+//     happen after the read float up to before the read.  It's OK for
+//     non-volatile memory refs that happen before the volatile read to
+//     float down below it.
+// (3) Similar a volatile write cannot let unrelated NON-volatile
+//     memory refs that happen BEFORE the write float down to after the
+//     write.  It's OK for non-volatile memory refs that happen after the
+//     volatile write to float up before it.
+//
+// We only put in barriers around volatile refs (they are expensive),
+// not _between_ memory refs (that would require us to track the
+// flavor of the previous memory refs).  Requirements (2) and (3)
+// require some barriers before volatile stores and after volatile
+// loads.  These nearly cover requirement (1) but miss the
+// volatile-store-volatile-load case.  This final case is placed after
+// volatile-stores although it could just as well go before
+// volatile-loads.
+//void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
+//                                     order_constraint) {
+void TemplateTable::volatile_barrier( ) {
+  // Helper function to insert a is-volatile test and memory barrier
+  //if (os::is_MP()) { // Not needed on single CPU
+  //  __ membar(order_constraint);
+  //}
+	if( !os::is_MP() ) return;	// Not needed on single CPU
+	__ sync();
+}
+
+// we dont shift left 2 bits in get_cache_and_index_at_bcp
+// for we always need shift the index we use it. the ConstantPoolCacheEntry
+// is 16-byte long, index is the index in
+// constantPoolCacheOopDesc, so cache + base_offset() + index * 16 is
+// the corresponding ConstantPoolCacheEntry
+// used registers : T2
+// NOTE : the returned index need also shift left 4 to get the address!
+void TemplateTable::resolve_cache_and_index(int byte_no,
+                                            Register Rcache,
+                                            Register index) {
+	assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
+
+	Register temp = T2;
+
+	assert_different_registers(Rcache, index, temp);
+
+	const int shift_count = (1 + byte_no)*BitsPerByte;
+	Label resolved;
+	__ get_cache_and_index_at_bcp(Rcache, index, 1);
+
+	// is resolved?
+	__ sll(AT, index, 4);
+	__ add(AT, Rcache, AT);
+	__ lw(AT, AT, in_bytes(constantPoolCacheOopDesc::base_offset()
+				+ ConstantPoolCacheEntry::indices_offset()));
+	__ shr(AT, shift_count);
+	__ andi(AT, AT, 0xff);
+	__ addi(AT, AT, - bytecode());
+	__ beq(AT, ZERO, resolved);
+	__ delayed()->nop();
+	// resolve first time through
+	address entry;
+	switch (bytecode()) {
+		case Bytecodes::_getstatic      : // fall through
+		case Bytecodes::_putstatic      : // fall through
+		case Bytecodes::_getfield       : // fall through
+		case Bytecodes::_putfield       :
+			entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
+			break;
+		case Bytecodes::_invokevirtual  : // fall through
+		case Bytecodes::_invokespecial  : // fall through
+		case Bytecodes::_invokestatic   : // fall through
+		case Bytecodes::_invokeinterface:
+			entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
+			break;
+		default                      		:
+			ShouldNotReachHere();
+	}
+
+	__ move(A1, (int)bytecode());
+	__ call_VM(NOREG, entry, A1);
+
+	// Update registers with resolved info
+	__ get_cache_and_index_at_bcp(Rcache, index, 1);
+	__ bind(resolved);
+}
+
+// The Rcache and index registers must be set before call
+void TemplateTable::load_field_cp_cache_entry(Register obj,
+                                              Register cache,
+                                              Register index,
+                                              Register off,
+                                              Register flags,
+                                              bool is_static = false) {
+	assert_different_registers(cache, index, flags, off);
+	ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
+	// Field offset
+	__ shl(index, 4);
+	__ add(index, cache, index);
+	__ lw(off, index, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
+	// Flags
+	__ lw(flags, index, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
+
+	// klass     overwrite register
+	if (is_static) {
+		__ lw(obj, index, in_bytes(cp_base_offset +
+					ConstantPoolCacheEntry::f1_offset()));
+		__ verify_oop(obj);
+	}
+}
+
+// get the method, itable_index and flags of the current invoke
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+                                               Register method,
+                                               Register itable_index,
+                                               Register flags,
+                                               bool is_invokevirtual,
+                                               bool is_invokevfinal /*unused*/) {
+	// setup registers
+	///const Register cache = ecx;
+	///const Register index = edx;
+	const Register cache = T3;
+	const Register index = T4;
+	assert_different_registers(method, flags);
+	assert_different_registers(method, cache, index);
+	assert_different_registers(itable_index, flags);
+	assert_different_registers(itable_index, cache, index);
+	// determine constant pool cache field offsets
+	const int method_offset = in_bytes(
+			constantPoolCacheOopDesc::base_offset() +
+			(is_invokevirtual
+			 ? ConstantPoolCacheEntry::f2_offset()
+			 : ConstantPoolCacheEntry::f1_offset()
+			)
+			);
+	const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+			ConstantPoolCacheEntry::flags_offset());
+	// access constant pool cache fields
+	const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
+			ConstantPoolCacheEntry::f2_offset());
+
+	resolve_cache_and_index(byte_no, cache, index);
+
+	assert(wordSize == 4, "adjust code below");
+	// note we shift 4 not 2, for we get is the true inde
+	// of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
+	__ sll(AT, index, 4);
+	__ add(AT, cache, AT);
+	__ lw(method, AT, method_offset);
+	if (itable_index != NOREG) {
+		//__ sll(AT, index, 4);
+		//__ addu(AT, cache, AT);
+		__ lw(itable_index, AT, index_offset);
+	}
+	__ lw(flags, AT, flags_offset);
+}
+
+
+// The registers cache and index expected to be set before call.
+// Correct values of the cache and index registers are preserved.
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
+                                            bool is_static, bool has_tos) {
+  // do the JVMTI work here to avoid disturbing the register state below
+  // We use c_rarg registers here because we want to use the register used in
+  // the call to the VM
+	if (JvmtiExport::can_post_field_access()) {
+		// Check to see if a field access watch has been set before we take
+		// the time to call into the VM.
+		Label L1;
+		assert_different_registers(cache, index, FSR);
+	//	__ movl(eax, Address((int)JvmtiExport::get_field_access_count_addr(), relocInfo::none));
+		__ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_access_count_addr()));
+		__ lw(FSR, AT, Assembler::split_low((int)JvmtiExport::get_field_access_count_addr()));
+		//	__ testl(eax,eax);
+		//	__ beq(T3, ZERO, L1);
+		__ beq(FSR, ZERO, L1);
+		__ delayed()->nop();
+
+		// We rely on the bytecode being resolved and the cpCache entry filled in.
+		// cache entry pointer
+		__ addi(cache, cache, in_bytes(constantPoolCacheOopDesc::base_offset()));
+		__ shl(index, 4);
+		__ add(cache, cache, index);
+		if (is_static) {
+			__ move(FSR, ZERO);
+		} else {
+			__ lw(FSR, SP, 0);
+			__ verify_oop(FSR);
+		}
+		// FSR: object pointer or NULL
+		// cache: cache entry pointer
+		__ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+					InterpreterRuntime::post_field_access), FSR, cache);
+		__ get_cache_and_index_at_bcp(cache, index, 1);
+		__ bind(L1);
+	}
+}
+
+void TemplateTable::pop_and_check_object(Register r) {
+  __ pop_ptr(r);
+  __ null_check(r);  // for field access must check obj.
+  __ verify_oop(r);
+}
+
+// used registers : T1, T2, T3, T4
+// T1 : flags
+// T2 : off
+// T3 : obj
+// T4 : field address
+// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
+// following mapping to the TosState states:
+// btos: 0
+// ctos: 1
+// stos: 2
+// itos: 3
+// ltos: 4
+// ftos: 5
+// dtos: 6
+// atos: 7
+// vtos: 8
+// see ConstantPoolCacheEntry::set_field for more info
+void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
+  transition(vtos, vtos);
+
+	// const Register cache = ecx;
+	const Register cache = T3;
+	// const Register index = edx;
+	const Register index = T4;
+
+	const Register obj   = T3;
+	const Register off   = T2;
+	const Register flags = T1;
+	resolve_cache_and_index(byte_no, cache, index);
+	jvmti_post_field_access(cache, index, is_static, false);
+	load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+	if (!is_static) pop_and_check_object(obj);
+	__ add(T4, obj, off);
+
+
+	Label Done, notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
+
+	assert(btos == 0, "change code, btos != 0");
+	__ shr(flags, ConstantPoolCacheEntry::tosBits);
+	__ andi(flags, flags, 0xf);
+	__ bne(flags, ZERO, notByte);
+	__ delayed()->nop();
+
+	// btos
+	__ lb(FSR, T4, 0);
+	__ sw(FSR, SP, - wordSize);
+
+	// Rewrite bytecode to be faster
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - wordSize);
+
+	__ bind(notByte);
+	__ move(AT, itos);
+	__ bne(T1, AT, notInt);
+	__ delayed()->nop();
+
+	// itos
+	__ lw(FSR, T4, 0);
+	__ sw(FSR, SP, - wordSize);
+
+	// Rewrite bytecode to be faster
+	if (!is_static) {
+		// patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
+		patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - wordSize);
+
+	__ bind(notInt);
+	__ move(AT, atos);
+	__ bne(T1, AT, notObj);
+	__ delayed()->nop();
+
+	// atos
+	__ lw(FSR, T4, 0);
+	__ sw(FSR, SP, - wordSize);
+
+	if (!is_static) {
+		//patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
+		patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - wordSize);
+
+	__ bind(notObj);
+	__ move(AT, ctos);
+	__ bne(T1, AT, notChar);
+	__ delayed()->nop();
+
+	// ctos
+	__ lhu(FSR, T4, 0);
+	__ sw(FSR, SP, - wordSize);
+
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - wordSize);
+
+	__ bind(notChar);
+	__ move(AT, stos);
+	__ bne(T1, AT, notShort);
+	__ delayed()->nop();
+
+	// stos
+	__ lh(FSR, T4, 0);
+	__ sw(FSR, SP, - wordSize);
+
+	if (!is_static) {
+		// patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
+		patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - wordSize);
+
+	__ bind(notShort);
+	__ move(AT, ltos);
+	__ bne(T1, AT, notLong);
+	__ delayed()->nop();
+
+	// FIXME : the load/store should be atomic, we have no simple method to do this in mips32
+	// ltos
+	__ lw(FSR, T4, 0*wordSize);
+	__ lw(SSR, T4, 1*wordSize);
+	__ sw(FSR, SP, - 2*wordSize);
+	__ sw(SSR, SP, - 1*wordSize);
+
+	// Don't rewrite to _fast_lgetfield for potential volatile case.
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - 2 * wordSize);
+
+	__ bind(notLong);
+	__ move(AT, ftos);
+	__ bne(T1, AT, notFloat);
+	__ delayed()->nop();
+
+	// ftos
+	__ lwc1(FSF, T4, 0);
+	__ swc1(FSF, SP, - wordSize);
+
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - wordSize);
+
+	__ bind(notFloat);
+	__ move(AT, dtos);
+	__ bne(T1, AT, notDouble);
+	__ delayed()->nop();
+
+	// dtos
+	__ lwc1(FSF, T4, 0 * wordSize);
+	__ lwc1(SSF, T4, 1 * wordSize);
+	__ swc1(FSF, SP, - 2 * wordSize);
+	__ swc1(SSF, SP, - 1 * wordSize);
+
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->addi(SP, SP, - 2 * wordSize);
+
+	__ bind(notDouble);
+
+	__ stop("Bad state");
+
+	__ bind(Done);
+	// Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
+ 	//volatile_barrier( );
+      //jerome_for_debug
+  /*      __ nop();
+        __ nop();
+        __ nop();
+        __ nop();
+        __ nop();
+        __ nop();
+        __ nop();
+        __ nop();
+        __ nop();
+	__ verify_oop(FSR);*/
+}
+
+void TemplateTable::getfield(int byte_no) {
+	getfield_or_static(byte_no, false);
+}
+
+void TemplateTable::getstatic(int byte_no) {
+	getfield_or_static(byte_no, true);
+}
+/*
+// used registers : T1, T2, T3, T4
+// T1 : cache & cp entry
+// T2 : obj
+// T3 : flags & value pointer
+// T4 : index
+// see ConstantPoolCacheEntry::set_field for more info
+void TemplateTable::jvmti_post_field_mod(int byte_no, bool is_static) {
+ */
+
+// The registers cache and index expected to be set before call.
+// The function may destroy various registers, just not the cache and index registers.
+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
+	ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
+
+	if (JvmtiExport::can_post_field_modification()) {
+		// Check to see if a field modification watch has been set before we take
+		// the time to call into the VM.
+		Label L1;
+		assert_different_registers(cache, index, T3);
+
+		__ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_modification_count_addr()));
+		__ lw(FSR, AT, Assembler::split_low((int)JvmtiExport::get_field_modification_count_addr()));
+		__ beq(FSR, ZERO, L1);
+		__ delayed()->nop();
+
+		/* // We rely on the bytecode being resolved and the cpCache entry filled in.
+		   resolve_cache_and_index(byte_no, T1, T4);
+		   */
+		// The cache and index registers have been already set.
+		// This allows to eliminate this call but the cache and index
+		// registers have to be correspondingly used after this line.
+		// __ get_cache_and_index_at_bcp(eax, edx, 1);
+		__ get_cache_and_index_at_bcp(T1, T4, 1);
+
+		if (is_static) {
+			__ move(T2, ZERO);
+		} else {
+			// Life is harder. The stack holds the value on top,
+			// followed by the object.
+			// We don't know the size of the value, though;
+			// it could be one or two words
+			// depending on its type. As a result, we must find
+			// the type to determine where the object is.
+			Label two_word, valsize_known;
+			__ sll(AT, T4, 4);
+			__ add(AT, T1, AT);
+			__ lw(T3, AT, in_bytes(cp_base_offset
+						+ ConstantPoolCacheEntry::flags_offset()));
+			__ move(T2, SP);
+			__ shr(T3, ConstantPoolCacheEntry::tosBits);
+
+			// Make sure we don't need to mask ecx for tosBits
+			// after the above shift
+			ConstantPoolCacheEntry::verify_tosBits();
+			__ move(AT, ltos);
+			__ beq(T3, AT, two_word);
+			__ delayed()->nop();
+			__ move(AT, dtos);
+			__ beq(T3, AT, two_word);
+			__ delayed()->nop();
+			__ b(valsize_known);
+			//__ delayed()->addi(T2, T2, wordSize*1);
+			__ delayed()->addi(T2, T2,Interpreter::expr_offset_in_bytes(1) );
+
+			__ bind(two_word);
+			//	__ addi(T2, T2, wordSize*2);
+			__ addi(T2, T2,Interpreter::expr_offset_in_bytes(2));
+
+			__ bind(valsize_known);
+			// setup object pointer
+			__ lw(T2, T2, 0*wordSize);
+		}
+		// cache entry pointer
+		__ addi(T1, T1, in_bytes(cp_base_offset));
+		__ shl(T4, 4);
+		__ addu(T1, T1, T4);
+		// object (tos)
+		__ move(T3, SP);
+		// T2: object pointer set up above (NULL if static)
+		// T1: cache entry pointer
+		// T3: jvalue object on the stack
+		__ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+				InterpreterRuntime::post_field_modification), T2, T1, T3);
+		__ get_cache_and_index_at_bcp(cache, index, 1);
+		__ bind(L1);
+	}
+}
+
+// used registers : T1, T2, T3, T4
+// T1 : flags
+// T2 : off
+// T3 : obj
+// T4 : volatile bit
+// see ConstantPoolCacheEntry::set_field for more info
+void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
+	transition(vtos, vtos);
+
+	const Register cache = T3;
+	const Register index = T4;
+	const Register obj   = T3;
+	const Register off   = T2;
+	const Register flags = T1;
+
+	resolve_cache_and_index(byte_no, cache, index);
+	jvmti_post_field_mod(cache, index, is_static);
+	load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+	// Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
+	// volatile_barrier( );
+
+	Label notVolatile, Done;
+	__ move(AT, 1<<ConstantPoolCacheEntry::volatileField);
+	__ andr(T4, T1, AT);
+
+	Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
+
+	assert(btos == 0, "change code, btos != 0");
+	// btos
+	__ shr(T1, ConstantPoolCacheEntry::tosBits);
+	__ andi(T1, T1, 0xf);
+	__ bne(T1, ZERO, notByte);
+	__ delayed()->nop();
+
+//	__ lb(FSR, SP, 0);
+//	__ addi(SP, SP, wordSize);
+	__ pop(btos);
+	if (!is_static) {
+		//		__ lw(T3, SP, addent);
+		//		addent += 1 * wordSize;
+		//		__ verify_oop(T3);
+		pop_and_check_object(T3);
+	}
+	__ add(AT, T3, T2);
+	__ sb(FSR, AT, 0);
+
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_bputfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->nop();
+
+	__ bind(notByte);
+	// itos
+	__ move(AT, itos);
+	__ bne(T1, AT, notInt);
+	__ delayed()->nop();
+
+//	__ lw(FSR, SP, 0);
+//	__ addi(SP, SP, wordSize);
+	__ pop(itos);
+	if (!is_static) {
+		//		__ lw(T3, SP, addent);
+		//		addent += 1 * wordSize;
+		//		__ verify_oop(T3);
+		pop_and_check_object(T3);
+	}
+	__ add(AT, T3, T2);
+	__ sw(FSR, AT, 0);
+
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_iputfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->nop();
+	__ bind(notInt);
+	// atos
+	__ move(AT, atos);
+	__ bne(T1, AT, notObj);
+	__ delayed()->nop();
+
+//	__ lw(FSR, SP, 0);
+//	__ addi(SP, SP, wordSize);
+	__ pop(atos);
+	if (!is_static) {
+		//		__ lw(T3, SP, addent);
+		//		addent += 1 * wordSize;
+		//		__ verify_oop(T3);
+		pop_and_check_object(T3);
+	}
+	__ add(AT, T3, T2);
+	__ sw(FSR, AT, 0);
+	__ store_check(T3);
+
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_aputfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->nop();
+	__ bind(notObj);
+	// ctos
+	__ move(AT, ctos);
+	__ bne(T1, AT, notChar);
+	__ delayed()->nop();
+
+//	__ lhu(FSR, SP, 0);
+//	__ addi(SP, SP, wordSize);
+	__ pop(ctos);
+	if (!is_static) {
+		//		__ lw(T3, SP, addent);
+		//		addent += 1 * wordSize;
+		//		__ verify_oop(T3);
+		pop_and_check_object(T3);
+	}
+	__ add(AT, T3, T2);
+	__ sh(FSR, AT, 0);
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_cputfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->nop();
+	__ bind(notChar);
+	// stos
+	__ move(AT, stos);
+	__ bne(T1, AT, notShort);
+	__ delayed()->nop();
+
+//	__ lh(FSR, SP, 0);
+//	__ addi(SP, SP, wordSize);
+	__ pop(stos);
+	if (!is_static) {
+		//		__ lw(T3, SP, addent);
+		//		addent += 1 * wordSize;
+		//		__ verify_oop(T3);
+		pop_and_check_object(T3);
+	}
+	__ add(AT, T3, T2);
+	__ sh(FSR, AT, 0);
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_sputfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->nop();
+	__ bind(notShort);
+	// ltos
+	__ move(AT, ltos);
+	__ bne(T1, AT, notLong);
+	__ delayed()->nop();
+
+	// FIXME: there is no simple method to load/store 64-bit data in a atomic operation
+	// we just ignore the volatile flag.
+	//Label notVolatileLong;
+	//__ beq(T4, ZERO, notVolatileLong);
+	//__ delayed()->nop();
+
+	//addent = 2 * wordSize;
+	// no need
+	//__ lw(FSR, SP, 0);
+	//__ lw(SSR, SP, 1 * wordSize);
+	//if (!is_static) {
+	//	__ lw(T3, SP, addent);
+	//	addent += 1 * wordSize;
+	//	__ verify_oop(T3);
+	//}
+
+	//__ addu(AT, T3, T2);
+
+	// Replace with real volatile test
+	// NOTE : we assume that sdc1&ldc1 operate in 32-bit, this is true for Godson2 even in 64-bit kernel
+	// last modified by yjl 7/12/2005
+	//__ ldc1(FSF, SP, 0);
+	//__ sdc1(FSF, AT, 0);
+	//volatile_barrier();
+
+	// Don't rewrite volatile version
+	//__ b(notVolatile);
+	//__ delayed()->addiu(SP, SP, addent);
+
+	//__ bind(notVolatileLong);
+
+	//__ pop(ltos);  // overwrites edx
+//	__ lw(FSR, SP, 0 * wordSize);
+//	__ lw(SSR, SP, 1 * wordSize);
+//	__ addi(SP, SP, 2*wordSize);
+	__ pop(ltos);
+	if (!is_static) {
+		//		__ lw(T3, SP, addent);
+		//		addent += 1 * wordSize;
+		//		__ verify_oop(T3);
+		pop_and_check_object(T3);
+	}
+	__ add(AT, T3, T2);
+	__ sw(FSR, AT, 0);
+	__ sw(SSR, AT, 4);
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_lputfield, T3, T2);
+	}
+	__ b(notVolatile);
+	__ delayed()->nop();
+
+	__ bind(notLong);
+	// ftos
+	__ move(AT, ftos);
+	__ bne(T1, AT, notFloat);
+	__ delayed()->nop();
+
+//	__ lwc1(FSF, SP, 0);
+//	__ addi(SP, SP, wordSize);
+	__ pop(ftos);
+	if (!is_static) {
+		//		__ lw(T3, SP, addent);
+		//		addent += 1 * wordSize;
+		//		__ verify_oop(T3);
+		pop_and_check_object(T3);
+	}
+	__ add(AT, T3, T2);
+	__ swc1(FSF, AT, 0);
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_fputfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->nop();
+	__ bind(notFloat);
+	// dtos
+	__ move(AT, dtos);
+	__ bne(T1, AT, notDouble);
+	__ delayed()->nop();
+
+	//__ ldc1(FSF, SP, 0);
+//	__ lwc1(FSF, SP, 0);
+//	__ lwc1(SSF, SP, wordSize);
+//	__ addi(SP, SP, 2*wordSize);
+	__ pop(dtos);
+	if (!is_static) {
+		//		__ lw(T3, SP, addent);
+		//		addent += 1 * wordSize;
+		//		__ verify_oop(T3);
+		pop_and_check_object(T3);
+	}
+	__ add(AT, T3, T2);
+	//__ sdc1(F12, AT, 0);
+	__ swc1(FSF, AT, 0);
+	__ swc1(SSF, AT, wordSize);
+	if (!is_static) {
+		patch_bytecode(Bytecodes::_fast_dputfield, T3, T2);
+	}
+	__ b(Done);
+	__ delayed()->nop();
+	__ bind(notDouble);
+
+	__ stop("Bad state");
+
+	__ bind(Done);
+
+	// Check for volatile store
+	__ beq(T4, ZERO, notVolatile);
+	__ delayed()->nop();
+	volatile_barrier( );
+	__ bind(notVolatile);
+}
+
+void TemplateTable::putfield(int byte_no) {
+	putfield_or_static(byte_no, false);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+	putfield_or_static(byte_no, true);
+}
+
+// used registers : T1, T2, T3
+// T1 : cp_entry
+// T2 : obj
+// T3 : value pointer
+void TemplateTable::jvmti_post_fast_field_mod() {
+	if (JvmtiExport::can_post_field_modification()) {
+		// Check to see if a field modification watch has been set before we take
+		// the time to call into the VM.
+		Label L2;
+		__ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_modification_count_addr()));
+		__ lw(T3, AT, Assembler::split_low((int)JvmtiExport::get_field_modification_count_addr()));
+		__ beq(T3, ZERO, L2);
+		__ delayed()->nop();
+		//__ pop(T2);
+		__ pop_ptr(T2);
+		//__ lw(T2, SP, 0);
+		__ verify_oop(T2);
+		__ push_ptr(T2);
+		__ addiu(SP, SP, -sizeof(jvalue));
+		__ move(T3, SP);
+		//__ push(T2);
+		//__ move(T2, ZERO);
+
+		switch (bytecode()) {          // load values into the jvalue object
+			case Bytecodes::_fast_bputfield:
+				__ sb(FSR, SP, 0);
+
+				break;
+			case Bytecodes::_fast_sputfield:
+				__ sh(FSR, SP, 0);
+				break;
+			case Bytecodes::_fast_cputfield:
+				__ sh(FSR, SP, 0);
+				break;
+			case Bytecodes::_fast_iputfield:
+				__ sw(FSR, SP, 0);
+				break;
+			case Bytecodes::_fast_lputfield:
+				__ sw(FSR, SP, 0);
+				__ sw(SSR, SP, 4);
+				break;
+			case Bytecodes::_fast_fputfield:
+				__ swc1(FSF, SP, 0);
+				break;
+			case Bytecodes::_fast_dputfield:
+				__ swc1(FSF, SP, 0);
+				__ swc1(SSF, SP, 4);
+				break;
+			case Bytecodes::_fast_aputfield:
+				__ sw(FSR, SP, 0);
+				break;
+			default:  ShouldNotReachHere();
+		}
+
+		//__ pop(T2);  // restore copy of object pointer
+
+		// Save eax and sometimes edx because call_VM() will clobber them,
+		// then use them for JVM/DI purposes
+		__ push(FSR);
+		if (bytecode() == Bytecodes::_fast_lputfield) __ push(SSR);
+		// access constant pool cache entry
+		__ get_cache_entry_pointer_at_bcp(T1, T4, 1);
+		// no need, verified ahead
+		__ verify_oop(T2);
+
+		// ebx: object pointer copied above
+		// eax: cache entry pointer
+		// ecx: jvalue object on the stack
+		__ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+					InterpreterRuntime::post_field_modification), T2, T1, T3);
+		if (bytecode() == Bytecodes::_fast_lputfield) __ pop(SSR);  // restore high value
+		//__ pop(FSR);     // restore lower value
+		//__ addi(SP, SP, sizeof(jvalue));  // release jvalue object space
+		__ lw(FSR, SP, 0);
+		__ addiu(SP, SP, sizeof(jvalue) + 1 * wordSize);
+		__ bind(L2);
+	}
+}
+
+// used registers : T2, T3, T4
+// T2 : index & off & field address
+// T3 : cache & obj
+// T4 : flags
+void TemplateTable::fast_storefield(TosState state) {
+	transition(state, vtos);
+
+	ByteSize base = constantPoolCacheOopDesc::base_offset();
+
+	jvmti_post_fast_field_mod();
+
+	// access constant pool cache
+	__ get_cache_and_index_at_bcp(T3, T2, 1);
+
+	// test for volatile with edx but edx is tos register for lputfield.
+	__ sll(AT, T2, 4);
+	__ add(AT, T3, AT);
+	__ lw(T4, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
+
+	// replace index with field offset from cache entry
+	__ lw(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
+
+	// Doug Lea believes this is not needed with current Sparcs (TSO) and Intel (PSO).
+	// volatile_barrier( );
+
+	Label notVolatile, Done;
+	// Check for volatile store
+	__ move(AT, 1<<ConstantPoolCacheEntry::volatileField);
+	__ andr(AT, T4, AT);
+	__ beq(AT, ZERO, notVolatile);
+	__ delayed()->nop();
+
+
+	// Get object from stack
+	// NOTE : the value in FSR/FSF now
+	//	__ pop(T3);
+	//	__ verify_oop(T3);
+	pop_and_check_object(T3);
+	// field addresses
+	__ add(T2, T3, T2);
+
+	// access field
+	switch (bytecode()) {
+		case Bytecodes::_fast_bputfield:
+			__ sb(FSR, T2, 0);
+			break;
+		case Bytecodes::_fast_sputfield: // fall through
+		case Bytecodes::_fast_cputfield:
+			__ sh(FSR, T2, 0);
+			break;
+		case Bytecodes::_fast_iputfield:
+			__ sw(FSR, T2, 0);
+			break;
+		case Bytecodes::_fast_lputfield:
+			__ sw(FSR, T2, 0 * wordSize);
+			__ sw(SSR, T2, 1 * wordSize);
+			break;
+		case Bytecodes::_fast_fputfield:
+			__ swc1(FSF, T2, 0);
+			break;
+		case Bytecodes::_fast_dputfield:
+			__ swc1(FSF, T2, 0 * wordSize);
+			__ swc1(SSF, T2, 1 * wordSize);
+			break;
+		case Bytecodes::_fast_aputfield:
+			__ sw(FSR, T2, 0);
+			__ store_check(T3);
+			break;
+		default:
+			ShouldNotReachHere();
+	}
+
+	Label done;
+	volatile_barrier( );
+	__ b(done);
+	__ delayed()->nop();
+
+	// Same code as above, but don't need edx to test for volatile.
+	__ bind(notVolatile);
+
+	// Get object from stack
+	//	__ pop(T3);
+	//	__ verify_oop(T3);
+	pop_and_check_object(T3);
+	//get the field address
+	__ add(T2, T3, T2);
+
+	// access field
+	switch (bytecode()) {
+		case Bytecodes::_fast_bputfield:
+			__ sb(FSR, T2, 0);
+			break;
+		case Bytecodes::_fast_sputfield: // fall through
+		case Bytecodes::_fast_cputfield:
+			__ sh(FSR, T2, 0);
+			break;
+		case Bytecodes::_fast_iputfield:
+			__ sw(FSR, T2, 0);
+			break;
+		case Bytecodes::_fast_lputfield:
+			__ sw(FSR, T2, 0 * wordSize);
+			__ sw(SSR, T2, 1 * wordSize);
+			break;
+		case Bytecodes::_fast_fputfield:
+			__ swc1(FSF, T2, 0);
+			break;
+		case Bytecodes::_fast_dputfield:
+			__ swc1(FSF, T2, 0 * wordSize);
+			__ swc1(SSF, T2, 1 * wordSize);
+			break;
+		case Bytecodes::_fast_aputfield:
+			__ sw(FSR, T2, 0);
+			__ store_check(T3);
+			break;
+		default:
+			ShouldNotReachHere();
+	}
+	__ bind(done);
+}
+
+// used registers : T2, T3, T4
+// T3 : cp_entry & cache
+// T2 : index & offset
+void TemplateTable::fast_accessfield(TosState state) {
+	transition(atos, state);
+
+	// do the JVMTI work here to avoid disturbing the register state below
+	if (JvmtiExport::can_post_field_access()) {
+		// Check to see if a field access watch has been set before we take
+		// the time to call into the VM.
+		Label L1;
+		__ lui(AT, Assembler::split_high((int)JvmtiExport::get_field_access_count_addr()));
+		__ lw(T3, AT, Assembler::split_low((int)JvmtiExport::get_field_access_count_addr()));
+		__ beq(T3, ZERO, L1);
+		__ delayed()->nop();
+		// access constant pool cache entry
+		__ get_cache_entry_pointer_at_bcp(T3, T4, 1);
+		__ move(TSR, FSR);
+		__ verify_oop(FSR);
+		// FSR: object pointer copied above
+		// T3: cache entry pointer
+		__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
+				FSR, T3);
+		__ move(FSR, TSR);
+		__ bind(L1);
+	}
+
+	// access constant pool cache
+	__ get_cache_and_index_at_bcp(T3, T2, 1);
+	// replace index with field offset from cache entry
+	__ sll(AT, T2, 4);
+	__ add(AT, T3, AT);
+	__ lw(T2, AT, in_bytes(constantPoolCacheOopDesc::base_offset()
+				+ ConstantPoolCacheEntry::f2_offset()));
+
+	// eax: object
+	__ verify_oop(FSR);
+	// __ null_check(FSR, 0);
+	__ null_check(FSR);
+	// field addresses
+	__ add(FSR, FSR, T2);
+
+	// access field
+	switch (bytecode()) {
+		case Bytecodes::_fast_bgetfield:
+			__ lb(FSR, FSR, 0);
+			break;
+		case Bytecodes::_fast_sgetfield:
+			__ lh(FSR, FSR, 0);
+			break;
+		case Bytecodes::_fast_cgetfield:
+			__ lhu(FSR, FSR, 0);
+			break;
+		case Bytecodes::_fast_igetfield:
+			__ lw(FSR, FSR, 0);
+			break;
+		case Bytecodes::_fast_lgetfield:
+			__ stop("should not be rewritten");
+			break;
+		case Bytecodes::_fast_fgetfield:
+			__ lwc1(FSF, FSR, 0);
+			break;
+		case Bytecodes::_fast_dgetfield:
+			__ lwc1(FSF, FSR, 0);
+			__ lwc1(SSF, FSR, 4);
+			break;
+		case Bytecodes::_fast_agetfield:
+			__ lw(FSR, FSR, 0);
+			__ verify_oop(FSR);
+			break;
+		default:
+			ShouldNotReachHere();
+	}
+
+	// Doug Lea believes this is not needed with current Sparcs(TSO) and Intel(PSO)
+	// volatile_barrier( );
+}
+
+// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
+// used registers : T1, T2, T3, T4
+// T1 : obj & field address
+// T2 : off
+// T3 : cache
+// T4 : index
+void TemplateTable::fast_xaccess(TosState state) {
+	transition(vtos, state);
+	// get receiver
+	__ lw(T1, aaddress(0));
+	debug_only(__ verify_local_tag(frame::TagReference, 0));
+	// access constant pool cache
+	__ get_cache_and_index_at_bcp(T3, T4, 2);
+	__ sll(AT, T4, 4);
+	__ add(AT, T3, AT);
+	__ lw(T2, AT, in_bytes(constantPoolCacheOopDesc::base_offset()
+				+ ConstantPoolCacheEntry::f2_offset()));
+
+	// make sure exception is reported in correct bcp range (getfield is next instruction)
+	__ addi(BCP, BCP, 1);
+	//	__ null_check(T1, 0);
+	__ null_check(T1);
+	__ add(T1, T1, T2);
+
+	if (state == itos) {
+		__ lw(FSR, T1, 0);
+	} else if (state == atos) {
+		__ lw(FSR, T1, 0);
+		__ verify_oop(FSR);
+	} else if (state == ftos) {
+		__ lwc1(FSF, T1, 0);
+	} else {
+		ShouldNotReachHere();
+	}
+	__ addi(BCP, BCP, -1);
+}
+
+//---------------------------------------------------
+//-------------------------------------------------
+// Calls
+
+void TemplateTable::count_calls(Register method, Register temp) {
+	// implemented elsewhere
+	ShouldNotReachHere();
+}
+
+// method, index, recv, flags: T1, T2, T3, T4
+// byte_no = 2 for _invokevirtual, 1 else
+// T0 : return address
+// get the method & index of the invoke, and push the return address of
+// the invoke(first word in the frame)
+// this address is where the return code jmp to.
+// NOTE : this method will set T3&T4 as recv&flags
+void TemplateTable::prepare_invoke(Register method, Register index,
+		                  int byte_no, Bytecodes::Code code) {
+	// determine flags
+	const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+	const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
+	const bool is_invokespecial    = code == Bytecodes::_invokespecial;
+	const bool load_receiver       = code != Bytecodes::_invokestatic;
+	const bool receiver_null_check = is_invokespecial;
+	// const bool save_flags = is_invokeinterface || is_invokevirtual;
+	// setup registers & access constant pool cache
+	const Register recv   = T3;
+	const Register flags  = T4;
+
+	assert_different_registers(method, index, recv, flags);
+
+	// save 'interpreter return address'
+	__ save_bcp();
+
+	load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual);
+
+	// load receiver if needed (note: no return address pushed yet)
+	if (load_receiver) {
+		__ andi(AT, flags, 0xff);
+		__ shl(AT, Interpreter::stackElementScale());
+		__ add(AT, SP, AT);
+		//__ move(T8, AT);
+		__ lw(recv, AT, - Interpreter::expr_offset_in_bytes(1));
+                __ verify_oop(recv);
+	}
+/*
+	if (load_receiver) {
+	Label mmm;
+	__ move(AT, 0xf0000000);
+	__ andr(AT, AT, recv);
+	__ srl(AT, AT, 28);
+	__ addi(AT, AT, -1);
+	__ bne(AT, ZERO, mmm);
+	__ delayed()->nop();
+//	__ move(AT,  (int)&jerome6);
+//	__ lw(AT, AT, 0);
+//	__ beq(AT, ZERO, mmm);
+//	__ delayed()->nop();
+	__ move(AT, (int)&jerome1 );
+	__ sw(SP, AT, 0);
+	__ move(AT, (int)&jerome2 );
+	__ sw(FP, AT, 0);
+	__ move(AT, (int)&jerome3 );
+	__ sw(BCP, AT, 0);
+	__ move(AT, (int)&jerome4 );
+	__ sw(recv, AT, 0);
+	__ move(AT, (int)&jerome5 );
+	__ sw(V0, AT, 0);
+
+
+	__ move(AT, (int)&jerome6 );
+	__ lw(flags, T8, -4);
+	__ sw(flags , AT, 0);
+	__ move(AT, (int)&jerome7 );
+	__ lw(flags, T8, 0);
+	__ sw(flags , AT, 0);
+
+	__ move(AT, (int)&jerome8 );
+	__ lw(flags, T8, 4);
+	__ sw(flags , AT, 0);
+
+	__ move(AT, (int)&jerome9 );
+	__ lw(flags, recv, oopDesc::klass_offset_in_bytes());
+	__ sw(flags , AT, 0);
+	__ move(AT, (int)&jerome10 );
+	__ lbu(flags, BCP, -1);
+	__ sw(flags , AT, 0);
+
+
+	__ move(AT, (int)&jerome5 );
+	__ lw(flags, AT, 0);
+
+
+	__ pushad();
+//	__ enter();
+	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+				relocInfo::runtime_call_type);
+	__ delayed()->nop();
+//	__ leave();
+	__ popad();
+
+	__ bind(mmm);
+	}
+*/
+	// do null check if needed
+	if (receiver_null_check) {
+		__ null_check(recv);
+	}
+        //FIXME, why not save flags here?
+	// compute return type
+	__ srl(T0, flags, ConstantPoolCacheEntry::tosBits);
+
+	// Make sure we don't need to mask flags for tosBits after the above shift
+	ConstantPoolCacheEntry::verify_tosBits();
+	// load return address
+	{
+		const int table =
+			is_invokeinterface
+			? (int)Interpreter::return_5_addrs_by_index_table()
+			: (int)Interpreter::return_3_addrs_by_index_table();
+		__ lui(AT, Assembler::split_high(table));
+		__ shl(T0, 2);
+		__ add(AT, AT, T0);
+		__ lw(RA, AT, Assembler::split_low(table));
+	}
+
+  // push return address, see generate_fixed_frame for more info
+//		__ push(T0);
+}
+
+// used registers : T0, T3, T4, T7, T9
+// T9 : entry
+// T3 : recv, this two register using convention is by prepare_invoke
+// T4 : flags, klass
+// T7 : method, index must be T7
+void TemplateTable::invokevirtual_helper(Register index, Register recv,
+		Register flags) {
+
+	assert_different_registers(index, recv, T1, T4);
+
+	// Test for an invoke of a final method
+	Label notFinal;
+	__ move(AT, (1 << ConstantPoolCacheEntry::vfinalMethod));
+	__ andr(AT, flags, AT);
+	__ beq(AT, ZERO, notFinal);
+	__ delayed()->nop();
+
+	Register method = index;  // method must be T7
+	assert(method == T7, "methodOop must be T7 for interpreter calling convention");
+
+	// do the call - the index is actually the method to call
+	// the index is indeed methodOop, for this is vfinal,
+	// see ConstantPoolCacheEntry::set_method for more info
+
+	__ verify_oop(method);
+
+	// It's final, need a null check here!
+//jerome_for_debug
+	__ null_check(recv);
+
+	// profile this call
+	__ profile_final_call(T0);
+
+	//__ lw(T9, method, in_bytes(methodOopDesc::interpreter_entry_offset()));
+	//__ jr(T9);
+	//__ delayed();
+	__ move(T0, recv);
+	__ jump_from_interpreted(method, T4);
+
+	__ bind(notFinal);
+
+	// get receiver klass
+	__ null_check(recv, oopDesc::klass_offset_in_bytes());
+	// Keep recv in ecx for callee expects it there
+	__ lw(T4, recv, oopDesc::klass_offset_in_bytes());
+	__ verify_oop(T4);
+//jerome10
+/*
+	Label nnn;
+	__ move(AT, 0x80000000);
+	__ andr(AT, AT, T4);
+	__ beq(AT, ZERO, nnn);
+	__ delayed()->nop();
+
+	__ move(AT, (int)&jerome10 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome1 );
+	__ sw(recv, AT, 0);
+	__ move(AT, (int)&jerome2 );
+	__ sw(T4, AT, 0);
+	__ move(AT, (int)&jerome3 );
+//	__ get_thread(T4);
+	__ sw(RA, AT, 0);
+	__ move(AT, (int)&jerome4 );
+	__ sw(SP, AT, 0);
+
+	__ move(AT, (int)&jerome5 );
+	__ sw(FP, AT, 0);
+
+	__ move(AT, (int)&jerome6 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome7 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome8 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome9 );
+	__ sw(ZERO, AT, 0);
+//	__ move(AT, (int)&jerome2 );
+//	__ lw(T4, AT, 0);
+
+	__ pushad();
+//	__ enter();
+	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+				relocInfo::runtime_call_type);
+	__ delayed()->nop();
+//	__ leave();
+	__ popad();
+
+
+	__ bind(nnn);
+*/
+	// profile this call
+	__ profile_virtual_call(T1, T0, T4);
+
+	// get target methodOop & entry point
+	const int base = instanceKlass::vtable_start_offset() * wordSize;
+	assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
+	__ sll(AT, index, 2);
+	__ add(AT, T4, AT);
+
+	//this is a ualign read
+	__ lw(method,AT,base+vtableEntry::method_offset_in_bytes());
+
+//	__ lhu(method, AT, base+vtableEntry::method_offset_in_bytes()+2);
+ //       __ lhu(T4, AT, base+vtableEntry::method_offset_in_bytes());
+  //      __ sll(method, method, 16);
+   //     __ addu(method, method, T4);
+	__ move(T0, recv);
+	__ jump_from_interpreted(method, T4);
+
+}
+
+void TemplateTable::invokevirtual(int byte_no) {
+	transition(vtos, vtos);
+	prepare_invoke(T7, NOREG, byte_no, bytecode());
+	// now recv & flags in T3, T4
+
+	invokevirtual_helper(T7, T3, T4);
+}
+
+// used registers : T9, T7
+// T9 : entry
+// T7 : method
+void TemplateTable::invokespecial(int byte_no) {
+	transition(vtos, vtos);
+	// prepare_invoke(method, index, byte_no, bytecode());
+	prepare_invoke(T7, NOREG, byte_no, bytecode());
+	// do the call
+	// now recv & flags in T3, T4
+	__ verify_oop(T7);
+	__ profile_call(T9);
+	__ jump_from_interpreted(T7, T9);
+	__ move(T0, T3);
+}
+
+void TemplateTable::invokestatic(int byte_no) {
+	transition(vtos, vtos);
+	prepare_invoke(T7, NOREG, byte_no, bytecode());
+	__ verify_oop(T7);
+	__ profile_call(T9);
+	__ jump_from_interpreted(T7, T9);
+}
+
+// i have no idea what to do here, now. for future change. FIXME.
+void TemplateTable::fast_invokevfinal(int byte_no) {
+	transition(vtos, vtos);
+	__ stop("fast_invokevfinal not used on x86");
+}
+
+// used registers : T0, T1, T2, T3, T4, T7
+// T0 : itable, vtable, entry
+// T1 : interface
+// T3 : receiver
+// T4 : flags, klass
+// T7 : index, method, this is required by interpreter_entry
+void TemplateTable::invokeinterface(int byte_no) {
+	transition(vtos, vtos);
+	//this method will use T1-T4 and T0
+	prepare_invoke(T1, T7, byte_no, bytecode());
+	// T1: Interface
+	// T2: index
+	// T3: receiver
+	// T4: flags
+       Label notMethod;
+	__ move(AT, (1 << ConstantPoolCacheEntry::methodInterface));
+	__ andr(AT, T4, AT);
+	__ beq(AT, ZERO, notMethod);
+	__ delayed()->nop();
+
+	// Special case of invokeinterface called for virtual method of
+	// java.lang.Object.  See cpCacheOop.cpp for details.
+	// This code isn't produced by javac, but could be produced by
+	// another compliant java compiler.
+	invokevirtual_helper(T7, T3, T4);
+
+	__ bind(notMethod);
+	// Get receiver klass into T4 - also a null check
+	__ lw(T4, T3, oopDesc::klass_offset_in_bytes());
+	__ verify_oop(T4);
+
+	// profile this call
+	__ profile_virtual_call(T4, T0, FSR);
+
+	// Compute start of first itableOffsetEntry (which is at the end of the vtable)
+	const int base = instanceKlass::vtable_start_offset() * wordSize;
+	assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
+	__ lw(AT, T4, instanceKlass::vtable_length_offset() * wordSize);
+	__ shl(AT, 2);
+	__ add(T0, T4, AT);
+	__ addi(T0, T0, base);
+	if (HeapWordsPerLong > 1) {
+		// Round up to align_object_offset boundary
+		__ round_to(T0, BytesPerLong);
+	}
+	// now T0 is the begin of the itable
+
+	Label entry, search, interface_ok;
+
+	///__ jmp(entry);
+	__ b(entry);
+	__ delayed()->nop();
+
+	__ bind(search);
+	__ increment(T0, itableOffsetEntry::size() * wordSize);
+
+	__ bind(entry);
+
+	// Check that the entry is non-null.  A null entry means that the receiver
+	// class doesn't implement the interface, and wasn't the same as the
+	// receiver class checked when the interface was resolved.
+	__ lw(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
+	__ bne(AT, ZERO, interface_ok);
+	__ delayed()->nop();
+	// throw exception
+	// the call_VM checks for exception, so we should never return here.
+
+	//__ pop();//FIXME here,
+	// pop return address (pushed by prepare_invoke).
+	// no need now, we just save the value in RA now
+
+	__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
+	__ should_not_reach_here();
+
+	__ bind(interface_ok);
+	//NOTICE here, no pop as x86 do
+	//__ lw(AT, T0, itableOffsetEntry::interface_offset_in_bytes());
+	__ bne(AT, T1, search);
+	__ delayed()->nop();
+
+	// now we get vtable of the interface
+	__ lw(T0, T0, itableOffsetEntry::offset_offset_in_bytes());
+	__ addu(T0, T4, T0);
+	assert(itableMethodEntry::size() * wordSize == 4, "adjust the scaling in the code below");
+	__ sll(AT, T7, 2);
+	__ addu(AT, T0, AT);
+	// now we get the method
+	__ lw(T7, AT, 0);
+	// T7: methodOop to call
+	// T3: receiver
+	// Check for abstract method error
+	// Note: This should be done more efficiently via a throw_abstract_method_error
+	//       interpreter entry point and a conditional jump to it in case of a null
+	//       method.
+	{
+		Label L;
+		///__ testl(ebx, ebx);
+		///__ jcc(Assembler::notZero, L);
+		__ bne(T7, ZERO, L);
+		__ delayed()->nop();
+		// throw exception
+		// note: must restore interpreter registers to canonical
+		//       state for exception handling to work correctly!
+		///__ popl(ebx);          // pop return address (pushed by prepare_invoke)
+		//__ restore_bcp();      // esi must be correct for exception handler
+		//(was destroyed)
+		//__ restore_locals();   // make sure locals pointer
+		//is correct as well (was destroyed)
+		///__ call_VM(noreg, CAST_FROM_FN_PTR(address,
+		//InterpreterRuntime::throw_AbstractMethodError));
+		__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+		// the call_VM checks for exception, so we should never return here.
+		__ should_not_reach_here();
+		__ bind(L);
+	}
+	__ jump_from_interpreted(T7, T9);
+}
+
+//----------------------------------------------------------------------------------------------------
+// Allocation
+// T1 : tags & buffer end & thread
+// T2 : object end
+// T3 : klass
+// T4 : object size
+// A1 : cpool
+// A2 : cp index
+// return object in FSR
+void TemplateTable::_new() {
+	transition(vtos, atos);
+	__ load_two_bytes_from_at_bcp(A2, AT, 1);
+	__ huswap(A2);
+
+	Label slow_case;
+	Label done;
+	Label initialize_header;
+	Label initialize_object;  // including clearing the fields
+	Label allocate_shared;
+
+	// get instanceKlass in T3
+	__ get_cpool_and_tags(A1, T1);
+	__ sll(AT, A2, 2);
+	__ add(AT, A1, AT);
+	__ lw(T3, AT, sizeof(constantPoolOopDesc));
+
+	// make sure the class we're about to instantiate has been resolved.
+	// Note: slow_case does a pop of stack, which is why we loaded class/pushed above
+	const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
+	__ add(T1, T1, A2);
+	__ lb(AT, T1, tags_offset);
+	//__ addiu(AT, AT, - (int)JVM_CONSTANT_UnresolvedClass);
+	__ addiu(AT, AT, - (int)JVM_CONSTANT_Class);
+	//__ beq(AT, ZERO, slow_case);
+	__ bne(AT, ZERO, slow_case);
+	__ delayed()->nop();
+
+	/*make sure klass is initialized & doesn't have finalizer*/
+
+	// make sure klass is fully initialized
+	__ lw(T1, T3, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
+	__ addiu(AT, T1, - (int)instanceKlass::fully_initialized);
+	__ bne(AT, ZERO, slow_case);
+	__ delayed()->nop();
+
+	// has_finalizer
+	//__ lw(T1, T3, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+	//__ move(AT, JVM_ACC_CAN_BE_FASTPATH_ALLOCATED);
+	//__ andr(AT, T1, AT);
+	//FIXME need confirmation and test. aoqi
+	__ lw(T1, T3, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc));
+	__ andi(AT, T1, Klass::_lh_instance_slow_path_bit);
+	__ beq(AT, ZERO, slow_case);
+	__ delayed()->nop();
+
+	// get instance_size in instanceKlass (already aligned) in T4,
+	// be sure to preserve this value
+	//__ lw(T4, T3, Klass::size_helper_offset_in_bytes() + sizeof(oopDesc));
+	//Klass::_size_helper is renamed Klass::_layout_helper. aoqi
+	__ lw(T4, T3, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc));
+
+	//
+	// Allocate the instance
+	// 1) Try to allocate in the TLAB
+	// 2) if fail and the object is large allocate in the shared Eden
+	// 3) if the above fails (or is not applicable), go to a slow case
+	// (creates a new TLAB, etc.)
+
+	const bool allow_shared_alloc =
+		Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
+
+	if (UseTLAB) {
+#ifndef OPT_THREAD
+		const Register thread = T1;
+		__ get_thread(thread);
+#else
+		const Register thread = TREG;
+#endif
+		// get tlab_top
+		__ lw(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
+		__ sll(AT, T4, 2);
+		__ add(T2, FSR, AT);
+		// get tlab_end
+		__ lw(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
+		__ slt(AT, AT, T2);
+		//		__ bne(AT, ZERO, allocate_shared);
+		__ bne(AT, ZERO, allow_shared_alloc ? allocate_shared : slow_case);
+		__ delayed()->nop();
+		__ sw(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
+
+		if (ZeroTLAB) {
+			// the fields have been already cleared
+			__ b(initialize_header);
+		} else {
+			// initialize both the header and fields
+			__ b(initialize_object);
+		}
+		__ delayed()->nop();
+		/*
+
+		   if (CMSIncrementalMode) {
+		// No allocation in shared eden.
+		///__ jmp(slow_case);
+		__ b(slow_case);
+		__ delayed()->nop();
+		}
+		*/
+	}
+
+	// Allocation in the shared Eden , if allowed
+	// T4 : instance size in words
+	if(allow_shared_alloc){
+		__ bind(allocate_shared);
+		Label retry;
+		Address heap_top(T1, Assembler::split_low((int)Universe::heap()->top_addr()));
+		__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr()));
+
+		__ lw(FSR, heap_top);
+		__ bind(retry);
+		__ sll(AT, T4, 2);
+		__ add(T2, FSR, AT);
+		__ lui(AT, Assembler::split_high((int)Universe::heap()->end_addr()));
+		__ lw(AT, AT, Assembler::split_low((int)Universe::heap()->end_addr()));
+		__ slt(AT, AT, T2);
+		__ bne(AT, ZERO, slow_case);
+		__ delayed()->nop();
+
+		// Compare eax with the top addr, and if still equal, store the new
+		// top addr in ebx at the address of the top addr pointer. Sets ZF if was
+		// equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
+		//
+		// FSR: object begin
+		// T2: object end
+		// T4: instance size in words
+
+		// if someone beat us on the allocation, try again, otherwise continue
+		//__ lui(T1, Assembler::split_high((int)Universe::heap()->top_addr()));
+		__ cmpxchg(T2, heap_top, FSR);
+		__ beq(AT, ZERO, retry);
+		__ delayed()->nop();
+	}
+
+	if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
+		// The object is initialized before the header.  If the object size is
+		// zero, go directly to the header initialization.
+		__ bind(initialize_object);
+		__ addiu(T4, T4, - sizeof(oopDesc) / oopSize);
+		__ beq(T4, ZERO, initialize_header);
+		__ delayed()->nop();
+
+
+		// T4 must have been multiple of 2
+#ifdef ASSERT
+		// make sure T4 was multiple of 2
+		Label L;
+		__ andi(AT, T4, 1);
+		__ beq(AT, ZERO, L);
+		__ delayed()->nop();
+		__ stop("object size is not multiple of 2 - adjust this code");
+		__ bind(L);
+		// edx must be > 0, no extra check needed here
+#endif
+
+		__ shr(T4, 1);
+		// initialize remaining object fields: T4 is a multiple of 2
+		{
+			Label loop;
+			__ sll(T1, T4, 3);
+			__ add(T1, FSR, T1);
+			__ addi(T1, T1, -8);
+
+			__ bind(loop);
+			__ sw(ZERO, T1, sizeof(oopDesc) + 0*oopSize);
+			__ sw(ZERO, T1, sizeof(oopDesc) + 1*oopSize);
+			__ bne(T1, FSR, loop); //dont clear header
+			__ delayed()->addi(T1, T1, -8);
+			// actually sizeof(oopDesc)==8, so we can move
+			// __ addiu(AT, AT, -8) to delay slot, and compare FSR with T1
+		}
+                //klass in T3,
+		// initialize object header only.
+		__ bind(initialize_header);
+		if (UseBiasedLocking) {
+			// __ popl(ecx);   // get saved klass back in the register.
+			// __ movl(ebx, Address(ecx, Klass::prototype_header_offset_in_bytes()
+			// + klassOopDesc::klass_part_offset_in_bytes()));
+			__ lw(AT,T3, Klass::prototype_header_offset_in_bytes()
+					+ klassOopDesc::klass_part_offset_in_bytes());
+			// __ movl(Address(eax, oopDesc::mark_offset_in_bytes ()), ebx);
+			__ sw(AT, FSR, oopDesc::mark_offset_in_bytes ());
+		} else {
+			__ move(AT, (int)markOopDesc::prototype());
+			__ sw(AT, FSR, oopDesc::mark_offset_in_bytes());
+		}
+
+		__ sw(T3, FSR, oopDesc::klass_offset_in_bytes());
+
+		{
+			SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
+			// Trigger dtrace event for fastpath
+			__ push(atos);
+			__ call_VM_leaf(
+				CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
+			__ pop(atos);
+		}
+		__ b(done);
+		__ delayed()->nop();
+	}
+	// slow case
+	__ bind(slow_case);
+	// call_VM(result, InterpreterRuntime::_new, cpool, index)
+	call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
+
+	// continue
+	__ bind(done);
+}
+
+void TemplateTable::newarray() {
+	transition(itos, atos);
+	__ lbu(A1, at_bcp(1));
+	//type, count
+	call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
+}
+
+void TemplateTable::anewarray() {
+	transition(itos, atos);
+	__ load_two_bytes_from_at_bcp(A2, AT, 1);
+	__ huswap(A2);
+	__ get_constant_pool(A1);
+	// cp, index, count
+	call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
+}
+
+void TemplateTable::arraylength() {
+	transition(atos, itos);
+	__ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
+	__ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
+}
+
+// i use T2 as ebx, T3 as ecx, T4 as edx
+// when invoke gen_subtype_check, super in T4, sub in T2, object in FSR(it's always)
+// T2 : sub klass
+// T3 : cpool
+// T4 : super klass
+void TemplateTable::checkcast() {
+	transition(atos, atos);
+	Label done, is_null, ok_is_subtype, quicked, resolved;
+	__ beq(FSR, ZERO, is_null);
+	__ delayed()->nop();
+
+	__ profile_checkcast(false, T3);
+
+	// Get cpool & tags index
+	__ get_cpool_and_tags(T3, T4);
+	__ load_two_bytes_from_at_bcp(T2, AT, 1);
+	__ huswap(T2);
+
+	// See if bytecode has already been quicked
+	__ add(AT, T4, T2);
+	__ lb(AT, AT, typeArrayOopDesc::header_size(T_BYTE) * wordSize);
+	__ addiu(AT, AT, - (int)JVM_CONSTANT_Class);
+	__ beq(AT, ZERO, quicked);
+	__ delayed()->nop();
+
+	__ move(TSR, FSR);	//call_VM blow FSR
+	call_VM(T4, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+	__ b(resolved);
+	__ delayed();	__ move(FSR, TSR);
+
+	// klass already in cp, get superklass in T4
+	__ bind(quicked);
+	__ sll(AT, T2, 2);
+	__ add(AT, T3, AT);
+	__ lw(T4, AT, sizeof(constantPoolOopDesc));
+
+	__ bind(resolved);
+
+
+
+
+
+	// get subklass in T2
+	__ lw(T2, FSR, oopDesc::klass_offset_in_bytes());
+/*	__ move(AT, (int)&jerome1 );
+	__ sw(T2, AT, 0);
+	__ move(AT, (int)&jerome2 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome3 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome4 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome5 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome6 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome7 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome8 );
+	__ sw(ZERO, AT, 0);
+
+	__ move(AT, (int)&jerome9 );
+	__ sw(ZERO, AT, 0);
+	__ move(AT, (int)&jerome10 );
+	__ sw(ZERO, AT, 0);
+
+
+	__ pushad();
+//	__ enter();
+	__ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_call_statistics),
+				relocInfo::runtime_call_type);
+	__ delayed()->nop();
+//	__ leave();
+	__ popad();
+*/
+	// Superklass in T4.  Subklass in T2.
+	__ gen_subtype_check(T4, T2, ok_is_subtype);
+
+	// Come here on failure
+	// object is at FSR
+	__ jmp(Interpreter::_throw_ClassCastException_entry);
+	__ delayed()->nop();
+
+	// Come here on success
+	__ bind(ok_is_subtype);
+
+	// Collect counts on whether this check-cast sees NULLs a lot or not.
+	if (ProfileInterpreter) {
+		__ b(done);
+		__ delayed()->nop();
+	}
+	__ bind(is_null);
+	__ profile_checkcast(true, T3);
+	__ bind(done);
+}
+
+// i use T3 as cpool, T4 as tags, T2 as index
+// object always in FSR, superklass in T4, subklass in T2
+void TemplateTable::instanceof() {
+	transition(atos, itos);
+	Label done, ok_is_subtype, quicked, resolved;
+
+	__ beq(FSR, ZERO, done);
+	__ delayed()->nop();
+
+	// Get cpool & tags index
+	__ get_cpool_and_tags(T3, T4);
+	// get index
+	__ load_two_bytes_from_at_bcp(T2, AT, 1);
+	__ hswap(T2);
+
+	// See if bytecode has already been quicked
+	// quicked
+	__ addu(AT, T4, T2);
+	__ lb(AT, AT, typeArrayOopDesc::header_size(T_BYTE) * wordSize);
+	__ addiu(AT, AT, - (int)JVM_CONSTANT_Class);
+	__ beq(AT, ZERO, quicked);
+	__ delayed()->nop();
+
+	// get superklass in T4
+	//__ move(TSR, FSR);
+	// sometimes S2 may be changed during the call,
+	// be careful if u use TSR as a saving place
+	//__ push(FSR);
+	__ push(atos);
+	call_VM(T4, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+	//__ lw(FSR, SP, 0);
+	__ pop_ptr(FSR);
+	__ b(resolved);
+	__ delayed()->nop();
+	//__ move(FSR, TSR);
+
+	// get superklass in T4, subklass in T2
+	__ bind(quicked);
+	__ sll(AT, T2, 2);
+	__ addu(AT, T3, AT);
+	__ lw(T4, AT, sizeof(constantPoolOopDesc));
+
+	__ bind(resolved);
+	// get subklass in T2
+	__ lw(T2, FSR, oopDesc::klass_offset_in_bytes());
+
+	// Superklass in T4.  Subklass in T2.
+	__ gen_subtype_check(T4, T2, ok_is_subtype);
+	// Come here on failure
+	__ b(done);
+	__ delayed(); __ move(FSR, ZERO);
+
+	// Come here on success
+	__ bind(ok_is_subtype);
+	__ move(FSR, 1);
+
+	__ bind(done);
+	// FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
+	// FSR = 1: obj != NULL and obj is     an instanceof the specified klass
+}
+
+//--------------------------------------------------------
+//--------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+
+	// Note: We get here even if we are single stepping..
+	// jbug inists on setting breakpoints at every bytecode
+	// even if we are in single step mode.
+
+	transition(vtos, vtos);
+
+	// get the unpatched byte code
+	///__ get_method(ecx);
+	///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at)
+	//, ecx, esi);
+	///__ movl(ebx, eax);
+	__ get_method(A1);
+	__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at),
+			A1, BCP);
+	__ move(T2, V0);
+
+	// post the breakpoint event
+	///__ get_method(ecx);
+	///__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), ecx, esi);
+	__ get_method(A1);
+	__ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
+
+	// complete the execution of original bytecode
+	__ dispatch_only_normal(vtos);
+}
+
+//----------------------------------------------------------------------------------------------------
+// Exceptions
+
+void TemplateTable::athrow() {
+	transition(atos, vtos);
+	__ null_check(FSR);
+	__ jmp(Interpreter::throw_exception_entry());
+	__ delayed()->nop();
+}
+
+//----------------------------------------------------------------------------------------------------
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+//       in the assembly code structure as well
+//
+// Stack layout:
+//
+// [expressions  ] <--- SP               = expression stack top
+// ..
+// [expressions  ]
+// [monitor entry] <--- monitor block top = expression stack bot
+// ..
+// [monitor entry]
+// [frame data   ] <--- monitor block bot
+// ...
+// [return addr  ] <--- FP
+
+// we use T2 as monitor entry pointer, T3 as monitor top pointer, T6 as free slot pointer
+// object always in FSR
+void TemplateTable::monitorenter() {
+	transition(atos, vtos);
+	// check for NULL object
+	__ null_check(FSR);
+
+	const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
+			* wordSize);
+	const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
+	Label allocated;
+
+	// initialize entry pointer
+	__ move(T6, ZERO);
+
+	// find a free slot in the monitor block (result in edx)
+	{
+		Label entry, loop, exit, next;
+		__ lw(T2, monitor_block_top);
+		__ b(entry);
+		__ delayed()->addi(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+
+		// free slot?
+		__ bind(loop);
+		__ lw(AT, T2, BasicObjectLock::obj_offset_in_bytes());
+		__ bne(AT, ZERO, next);
+		__ delayed()->nop();
+		__ move(T6, T2);
+
+		__ bind(next);
+		__ beq(FSR, AT, exit);
+		__ delayed()->nop();
+		__ addi(T2, T2, entry_size);
+
+		__ bind(entry);
+		__ bne(T3, T2, loop);
+		__ delayed()->nop();
+		__ bind(exit);
+	}
+
+	__ bne(T6, ZERO, allocated);
+	__ delayed()->nop();
+
+	// allocate one if there's no free slot
+	{
+		Label entry, loop;
+		// 1. compute new pointers                   // SP: old expression stack top
+		__ lw(T6, monitor_block_top);
+		__ addi(SP, SP, - entry_size);
+		__ addi(T6, T6, - entry_size);
+		__ sw(T6, monitor_block_top);
+		__ b(entry);
+		__ delayed();
+		__ move(T3, SP);
+
+		// 2. move expression stack contents
+		__ bind(loop);
+		__ lw(AT, T3, entry_size);
+		__ sw(AT, T3, 0);
+		__ addi(T3, T3, wordSize);
+		__ bind(entry);
+		__ bne(T3, T6, loop);
+		__ delayed()->nop();
+	}
+
+	__ bind(allocated);
+	// Increment bcp to point to the next bytecode,
+	// so exception handling for async. exceptions work correctly.
+	// The object has already been poped from the stack, so the
+	// expression stack looks correct.
+	__ addi(BCP, BCP, 1);
+	__ sw(FSR, T6, BasicObjectLock::obj_offset_in_bytes());
+	__ lock_object(T6);
+	// check to make sure this monitor doesn't cause stack overflow after locking
+	__ save_bcp();  // in case of exception
+	__ generate_stack_overflow_check(0);
+	// The bcp has already been incremented. Just need to dispatch to next instruction.
+
+	__ dispatch_next(vtos);
+}
+
+// T2 : top
+// T6 : entry
+void TemplateTable::monitorexit() {
+	transition(atos, vtos);
+
+	__ null_check(FSR);
+
+	const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
+	Label found;
+
+	// find matching slot
+	{
+		Label entry, loop;
+		__ lw(T6, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+		__ b(entry);
+		__ delayed()->addiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+
+		__ bind(loop);
+		__ lw(AT, T6, BasicObjectLock::obj_offset_in_bytes());
+		__ beq(FSR, AT, found);
+		__ delayed()->nop();
+		__ addiu(T6, T6, entry_size);
+		__ bind(entry);
+		__ bne(T2, T6, loop);
+		__ delayed()->nop();
+	}
+
+	// error handling. Unlocking was not block-structured
+	Label end;
+	__ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+				InterpreterRuntime::throw_illegal_monitor_state_exception));
+	__ should_not_reach_here();
+
+	// call run-time routine
+	// T6: points to monitor entry
+	__ bind(found);
+	__ move(TSR, FSR);
+	__ unlock_object(T6);
+	__ move(FSR, TSR);
+	__ bind(end);
+}
+
+//--------------------------------------------------------------------------------------------------// Wide instructions
+
+void TemplateTable::wide() {
+	transition(vtos, vtos);
+	// Note: the esi increment step is part of the individual wide bytecode implementations
+	__ lbu(T7, at_bcp(1));
+	__ sll(AT, T7, 2);
+	__ lui(T9, Assembler::split_high(int(Interpreter::_wentry_point)));
+	__ add(T9, T9, AT);
+	__ lw(T9, T9, Assembler::split_low(int(Interpreter::_wentry_point)));
+	__ jr(T9);
+	__ delayed()->nop();
+}
+
+//--------------------------------------------------------------------------------------------------// Multi arrays
+
+void TemplateTable::multianewarray() {
+	transition(vtos, atos);
+	// last dim is on top of stack; we want address of first one:
+	// first_addr = last_addr + (ndims - 1) * wordSize
+	__ lbu(A1, at_bcp(3));	// dimension
+	//	__ sll(A1, A1, 2);
+	__ sll(A1, A1, Interpreter::stackElementScale());
+	__ addi(A1, A1, -4);
+	__ add(A1, SP, A1);		// now A1 pointer to the count array on the stack
+	call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
+	__ lbu(AT, at_bcp(3));
+	//	__ sll(AT, AT, 2);
+	__ sll(AT, AT, Interpreter::stackElementScale());
+	__ add(SP, SP, AT);
+}
+
+#endif // !CC_INTERP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2003-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  static void prepare_invoke(Register method, Register index, int byte_no,
+                             Bytecodes::Code code);
+  static void invokevirtual_helper(Register index, Register recv,
+                                   Register flags);
+  //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
+  static void volatile_barrier();
+
+  // Helpers
+  static void index_check(Register array, Register index);
+  static void index_check_without_pop(Register array, Register index);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry)            \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* JavaCallWrapper            */                                                                                                   \
+  /******************************/                                                                                                   \
+  /******************************/                                                                                                   \
+  /* JavaFrameAnchor            */                                                                                                   \
+  /******************************/                                                                                                   \
+  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
+                                                                                                                                     \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used  */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
+  /* be present there)                                                */
+
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry)                               \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used  */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
+  /* be present there)                                                */
+
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry)                                                              \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used        */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
+  /* be present there)                                                      */
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry)                                                              \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used         */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
+  /* be present there)                                                       */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_vm_version_mips.cpp.incl"
+/*
+int VM_Version::_cpu;
+int VM_Version::_model;
+int VM_Version::_stepping;
+int VM_Version::_cpuFeatures;
+const char*           VM_Version::_features_str = "";
+VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
+
+static BufferBlob* stub_blob;
+static const int stub_size = 300;
+
+extern "C" {
+  typedef void (*getPsrInfo_stub_t)(void*);
+}
+static getPsrInfo_stub_t getPsrInfo_stub = NULL;
+*/
+int VM_Version::_features = VM_Version::unknown_m;
+const char* VM_Version::_features_str = "";
+/*
+class VM_Version_StubGenerator: public StubCodeGenerator {
+ public:
+
+  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+  address generate_getPsrInfo() {
+  };
+};
+
+
+void VM_Version::get_processor_features() {
+}
+*/
+void VM_Version::initialize() {
+	_features = determine_features();
+	//no need, Abstract_VM_Version already define it as false
+	//_supports_cx8 = false;i
+
+	char buf[256];
+	jio_snprintf(buf, sizeof(buf), "%s, %s"
+#ifdef OPT_RANGECHECK
+			", optimized range check"
+#endif
+#ifdef OPT_PHI_1
+			", optimized phi"
+#endif
+#ifdef OPT_MERGE
+			", optimized merge"
+#endif
+			,	(has_l2_cache() ? "has_l2_cache" : ""), (has_16k_page() ? "has_16k_page" : "")
+	);
+	//////////////////////add some other feature here//////////////////
+
+	// buf is started with ", " or is empty
+	_features_str = strdup(buf);
+	NOT_PRODUCT( if (PrintMiscellaneous && Verbose) print_features(); );
+}
+
+void VM_Version::print_features() {
+	tty->print_cr("Version:%s", cpu_features());
+}
+
+int VM_Version::determine_features() {
+	//////////////////////add some other feature here//////////////////
+	return spt_16k_page_m;
+}
+
+static int saved_features = 0;
+
+void VM_Version::allow_all() {
+	saved_features = _features;
+	_features     = all_features_m;
+}
+
+void VM_Version::revert() {
+	_features = saved_features;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+
+class VM_Version: public Abstract_VM_Version {
+protected:
+	 enum Feature_Flag {
+		 with_l2_cache = 0,
+		 spt_16k_page = 1,
+		 //////////////////////add some other feature here//////////////////
+	 };
+
+	 enum Feature_Flag_Set {
+		 unknown_m	  = 0,
+		 all_features_m	  = -1,
+		 with_l2_cache_m  = 1 << with_l2_cache,
+		 spt_16k_page_m   = 1 << spt_16k_page,
+
+		 //////////////////////add some other feature here//////////////////
+	 };
+
+	static int  _features;
+	static const char* _features_str;
+
+	static void print_features();
+	static int  determine_features();
+
+public:
+	// Initialization
+	static void initialize();
+
+	//mips has no such instructions, use ll/sc instead
+	static bool supports_compare_and_exchange() { return false; }
+
+	static bool has_l2_cache() { return _features & with_l2_cache_m; }
+	static bool has_16k_page() { return _features & spt_16k_page_m; }
+
+	//////////////////////add some other feature here//////////////////
+
+	static const char* cpu_features() { return _features_str; }
+
+	// Assembler testing
+	static void allow_all();
+	static void revert();
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2006-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_vmreg_mips.cpp.incl"
+
+
+
+void VMRegImpl::set_regName() {
+  Register reg = ::as_Register(0);
+  int i;
+  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
+    regName[i++] = reg->name();
+    regName[i++] = reg->name();
+    reg = reg->successor();
+  }
+
+  FloatRegister freg = ::as_FloatRegister(0);
+  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
+    regName[i++] = freg->name();
+    if (freg->encoding() > 31) {
+      regName[i++] = freg->name();
+    }
+    freg = freg->successor();
+  }
+
+  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
+    //regName[i] = "NON-GPR-FPR-XMM";
+    regName[i] = "NON-GPR-FPR";
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  bool is_Register();
+  Register as_Register();
+
+  bool is_FloatRegister();
+  FloatRegister as_FloatRegister();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2006-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline VMReg RegisterImpl::as_VMReg() {
+  if( this==noreg ) return VMRegImpl::Bad();
+	//FIXME why encoding << 1? what is the meaning of the VMReg's value
+  return VMRegImpl::as_VMReg(encoding() << 1 );
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() {
+  //return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
+	return VMRegImpl::as_VMReg( ConcreteRegisterImpl::max_gpr + encoding() );
+}
+
+inline bool VMRegImpl::is_Register() { return value() >= 0 && value() < ConcreteRegisterImpl::max_gpr; }
+inline bool VMRegImpl::is_FloatRegister() { return value() >= ConcreteRegisterImpl::max_gpr &&
+                                                   value() < ConcreteRegisterImpl::max_fpr; }
+inline Register VMRegImpl::as_Register() {
+
+  assert( is_Register(), "must be");
+  // Yuk
+  return ::as_Register(value() >> 1);
+}
+
+inline FloatRegister VMRegImpl::as_FloatRegister() {
+  //assert( is_FloatRegister() && is_even(value()), "must be" );
+  assert( is_FloatRegister(), "must be" );
+  // Yuk
+  return ::as_FloatRegister( value() - ConcreteRegisterImpl::max_gpr );
+}
+
+inline   bool VMRegImpl::is_concrete() {
+  assert(is_reg(), "must be");
+  int v = value();
+  if ( v  <  ConcreteRegisterImpl::max_gpr ) {
+    return is_even(v);
+  }
+  // F0..F31
+  if ( v <= ConcreteRegisterImpl::max_gpr + 31) return true;
+  if ( v <  ConcreteRegisterImpl::max_fpr) {
+    return is_even(v);
+  }
+  assert(false, "what register?");
+  return false;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/mips/vm/vtableStubs_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_vtableStubs_mips.cpp.incl"
+
+// machine-dependent part of VtableStubs: create VtableStub of correct size and
+// initialize its code
+
+#define __ masm->
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread,
+                                          oop receiver,
+                                          int index);
+#endif
+
+// used by compiler only;  reciever in T0.
+// used registers :
+// T7 : receiver klass & method
+// NOTE: If this code is used by the C1, the receiver_location is always 0.
+// when reach here, receiver in T0, klass in T8
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+	const int gs2_code_length = VtableStub::pd_code_size_limit(true);
+	//VtableStub* s = new(gs2_code_length) VtableStub(true, vtable_index, receiver_location);
+	//FIXME aoqi
+	VtableStub* s = new(gs2_code_length) VtableStub(true, vtable_index);
+	ResourceMark rm;
+        CodeBuffer cb(s->entry_point(), gs2_code_length);
+	MacroAssembler* masm = new MacroAssembler(&cb);
+	Register t1 = T8, t2 = T7;
+#ifndef PRODUCT
+//#ifdef COMPILER2
+	if (CountCompiledCalls) {
+		__ move(AT, (int)SharedRuntime::nof_megamorphic_calls_addr());
+	        __ lw(t1, AT , 0);
+		__ addiu(t1, t1, 1);
+		__ sw(t1, AT,0);
+	}
+//#endif
+#endif
+
+     // get receiver (need to skip return address on top of stack)
+     //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
+
+     // get receiver klass
+	address npe_addr = __ pc();
+	__ lw(t1, T0, oopDesc::klass_offset_in_bytes());
+	// compute entry offset (in words)
+	int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
+#ifndef PRODUCT
+	if (DebugVtables) {
+		Label L;
+		// check offset vs vtable length
+		__ lw(t2, t1, instanceKlass::vtable_length_offset()*wordSize);
+		assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
+		__ move(AT, vtable_index*vtableEntry::size());
+		__ slt(AT, AT, t2);
+		__ bne(AT, ZERO, L);
+		__ delayed()->nop();
+		__ move(A2, vtable_index);
+		__ move(A1, A0);
+		__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
+		__ bind(L);
+	}
+#endif // PRODUCT
+	// load methodOop and target address
+	const Register method = T7;
+	// __ movl(method, Address(eax, entry_offset*wordSize + vtableEntry::method_offset_in_bytes()));
+	__ lw(method, t1,  entry_offset*wordSize + vtableEntry::method_offset_in_bytes());
+	if (DebugVtables) {
+	      Label L;
+	      __ beq(method, ZERO, L);
+	      __ delayed()->nop();
+	      // __ cmpl(Address(method, methodOopDesc::from_compiled_offset()), NULL_WORD);
+	      __ lw(AT, method,in_bytes(methodOopDesc::from_compiled_offset()));
+	      //__ jcc(Assembler::notZero, L);
+	      __ bne(AT, ZERO, L);
+              __ delayed()->nop();
+	      __ stop("Vtable entry is NULL");
+	      __ bind(L);
+      }
+	// T8: receiver klass
+	// T0: receiver
+	// T7: methodOop
+	// T9: entry
+	address ame_addr = __ pc();
+         __ lw(T9, method,in_bytes(methodOopDesc::from_compiled_offset()));
+         __ jr(T9);
+         __ delayed()->nop();
+	masm->flush();
+	s->set_exception_points(npe_addr, ame_addr);
+	return s;
+}
+
+
+// i am not sure which register to contain Interface, now i just assume A1. FIXME
+// used registers :
+//	T1 T7
+// when reach here, the receiver in T0, klass in T1
+VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+  // Note well: pd_code_size_limit is the absolute minimum we can get
+  // away with.  If you add code here, bump the code stub size
+  // returned by pd_code_size_limit!
+   const int gs2_code_length = VtableStub::pd_code_size_limit(false);
+   VtableStub* s = new(gs2_code_length) VtableStub(false, vtable_index);
+   ResourceMark rm;
+   CodeBuffer cb(s->entry_point(), gs2_code_length);
+   MacroAssembler* masm = new MacroAssembler(&cb);
+// we T8,T9 as temparary register, they are free from register allocator
+     Register t1 = T8, t2 = T7;
+	// Entry arguments:
+	//  T1: Interface
+	//  T0: Receiver
+#ifndef PRODUCT
+  if (CountCompiledCalls) {
+	  //__ incl(Address((int)SharedRuntime::nof_megamorphic_calls_addr(), relocInfo::none));
+            __ move(AT, (int)SharedRuntime::nof_megamorphic_calls_addr());
+            __ lw(T8, AT, 0);
+	    __ addi(T8, T8,1);
+            __ sw(T8, AT, 0);
+  }
+#endif /* PRODUCT */
+	//assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
+	// get receiver klass (also an implicit null-check)
+	address npe_addr = __ pc();
+	__ lw(t1, T0, oopDesc::klass_offset_in_bytes());
+	// compute itable entry offset (in words)
+	const int base = instanceKlass::vtable_start_offset() * wordSize;
+	assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
+	assert(Assembler::is_simm16(base), "change this code");
+	__ addi(t2, t1, base);
+	assert(Assembler::is_simm16(instanceKlass::vtable_length_offset() * wordSize), "change this code");
+	__ lw(AT, t1, instanceKlass::vtable_length_offset() * wordSize);
+	__ shl(AT, 2);
+	__ add(t2, t2, AT);
+	if (HeapWordsPerLong > 1) {
+		__ round_to(t2, BytesPerLong);
+	}
+
+	Label hit, entry;
+   	assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
+	__ bind(entry);
+
+#ifdef ASSERT
+	// Check that the entry is non-null
+	if (DebugVtables) {
+		Label L;
+		assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+		__ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
+		__ bne(AT, ZERO, L);
+		__ delayed()->nop();
+		__ stop("null entry point found in itable's offset table");
+		__ bind(L);
+	}
+#endif
+	assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+	__ lw(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
+	__ bne(AT, T1, entry);
+	__ delayed()->addi(t2, t2, itableOffsetEntry::size() * wordSize);
+
+	// We found a hit, move offset into T9
+	__ lw(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
+
+	// Compute itableMethodEntry.
+	const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) +
+		itableMethodEntry::method_offset_in_bytes();
+
+  // Get methodOop and entrypoint for compiler
+//  const Register method = ebx;
+      const Register method = T7;
+//   __ movl(method, Address(esi, edx, Address::times_1, method_offset));
+      __ sll(AT, t2, Address::times_1);
+      __ add(AT, AT, t1 );
+      __ lw(method, AT,  method_offset);
+
+
+
+#ifdef ASSERT
+	if (DebugVtables) {
+		Label L1;
+	//      __ cmpl(method, NULL_WORD);
+ //     __ jcc(Assembler::equal, L1);
+        __ beq(method, ZERO, L1);
+        __ delayed()->nop();
+//	__ cmpl(Address(method, methodOopDesc::from_compiled_offset()), NULL_WORD);
+        __ lw(AT, method,in_bytes(methodOopDesc::from_compiled_offset()));
+//	__ jcc(Assembler::notZero, L1);
+        __ bne(AT, ZERO, L1);
+        __ delayed()->nop();
+	__ stop("methodOop is null");
+	__ bind(L1);
+	}
+#endif // ASSERT
+/*
+	// T7: methodOop
+	// T0: receiver
+	// T9: entry point
+	__ jmp(T9);
+#endif // COMPILER2
+*/
+        address ame_addr = __ pc();
+	//__ jmp(Address(method, methodOopDesc::from_compiled_offset()));
+        __ lw(T9, method,in_bytes(methodOopDesc::from_compiled_offset()));
+        __ jr(T9);
+        __ delayed()->nop();
+    masm->flush();
+	s->set_exception_points(npe_addr, ame_addr);
+	return s;
+}
+
+// NOTE : whenever you change the code above, dont forget to change the const here
+int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
+	if (is_vtable_stub) {
+            return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0);
+	} else {
+            return  ( DebugVtables ? 636 : 72) + (CountCompiledCalls ? 24 : 0);
+	}
+}
+
+int VtableStub::pd_code_alignment() {
+  return wordSize;
+}
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -4474,7 +4474,7 @@
 void MacroAssembler::cmpoop(Register src1, jobject obj) {
   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 }
-
+/*
 void MacroAssembler::extend_sign(Register hi, Register lo) {
   // According to Intel Doc. AP-526, "Integer Divide", p.18.
   if (VM_Version::is_P6() && hi == rdx && lo == rax) {
@@ -4484,7 +4484,7 @@
     sarl(hi, 31);
   }
 }
-
+*/
 void MacroAssembler::fat_nop() {
   // A 5 byte nop that is safe for patching (see patch_verified_entry)
   emit_byte(0x26); // es:
@@ -6183,7 +6183,7 @@
     Assembler::ldmxcsr(Address(rscratch1, 0));
   }
 }
-
+/*
 int MacroAssembler::load_signed_byte(Register dst, Address src) {
   int off;
   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
@@ -6243,7 +6243,7 @@
   }
   return off;
 }
-
+*/
 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
   if (reachable(dst)) {
     movl(as_Address(dst), src);
--- a/hotspot/src/os/linux/launcher/java_md.c	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/os/linux/launcher/java_md.c	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -79,6 +80,8 @@
 #    define ARCH "i386"
 #  elif defined(__sparc)
 #    define ARCH "sparc"
+#  elif defined(MIPS32)
+#    define ARCH "mipsel"
 #  endif

 #endif /* _LP64 */
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -161,7 +162,7 @@


 #ifndef SYS_gettid
-// i386: 224, ia64: 1105, amd64: 186, sparc 143
+// i386: 224, ia64: 1105, amd64: 186, sparc 143, mips 4222
 #ifdef __ia64__
 #define SYS_gettid 1105
 #elif __i386__
@@ -170,6 +171,8 @@
 #define SYS_gettid 186
 #elif __sparc__
 #define SYS_gettid 143
+#elif __mips__
+#define SYS_gettid 4222
 #else
 #error define gettid for the arch
 #endif
@@ -188,6 +191,8 @@
 #  else
 static char cpu_arch[] = "sparc";
 #  endif
+#elif defined(MIPS32)
+static char cpu_arch[] = "mipsel";  //used to locate .so in /usr/lib/jvm/open..../lib/cpu_arch/...
 #else
 #error Add appropriate cpu_arch setting
 #endif
@@ -1738,6 +1743,10 @@
     {EM_SPARC,       EM_SPARC,   ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
     {EM_SPARC32PLUS, EM_SPARC,   ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
     {EM_SPARCV9,     EM_SPARCV9, ELFCLASS64, ELFDATA2MSB, (char*)"Sparc v9 64"},
+		{EM_MIPS,        EM_MIPS,    ELFCLASS64, ELFDATA2LSB, (char*)"MIPS64 LE"},
+	                // YYY would've guessed EM_MIPS_RS3_LE
+	                // from reading <elf.h> but dumpelf
+	                // libjvm.so (e.g.) says EM_MIPS
     {EM_PPC,         EM_PPC,     ELFCLASS32, ELFDATA2MSB, (char*)"Power PC 32"},
     {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"}
   };
@@ -1752,13 +1761,15 @@
     static  Elf32_Half running_arch_code=EM_SPARCV9;
   #elif  (defined __sparc) && (!defined _LP64)
     static  Elf32_Half running_arch_code=EM_SPARC;
+  #elif  (defined MIPS32)
+	    static  Elf32_Half running_arch_code=EM_MIPS;
   #elif  (defined __powerpc64__)
     static  Elf32_Half running_arch_code=EM_PPC64;
   #elif  (defined __powerpc__)
     static  Elf32_Half running_arch_code=EM_PPC;
   #else
     #error Method os::dll_load requires that one of following is defined:\
-         IA32, AMD64, IA64, __sparc, __powerpc__
+         IA32, AMD64, IA64, __sparc, __mips64, __powerpc__
   #endif

   // Identify compatability class for VM's architecture and library's architecture
@@ -2566,7 +2577,16 @@
     // format has been changed), we'll use the largest page size supported by
     // the processor.

-    _large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M);
+    //_large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M);
+    _large_page_size =
+	  	IA32_ONLY(4 * M)
+	  	AMD64_ONLY(2 * M)
+      IA64_ONLY(256 * M)
+      SPARC_ONLY(4 * M)
+      MIPS_ONLY(4 * M)      // YYY guess - doesn't seem there are
+                // large pages at all - prohibit this
+                // option altogether?
+      ;

     FILE *fp = fopen("/proc/meminfo", "r");
     if (fp) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,107 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_assembler_linux_mips.cpp.incl"
+
+#ifndef _LP64
+void MacroAssembler::int3() {
+  //call(CAST_FROM_FN_PTR(address, os::breakpoint), relocInfo::runtime_call_type);
+//	move(T9, (int)CAST_FROM_FN_PTR(address, os::breakpoint));
+  int imm = (int)CAST_FROM_FN_PTR(address, os::breakpoint);
+	if (is_simm16(imm)) {
+		addiu(T9, ZERO, imm);
+	} else {
+		lui(T9, split_high(imm));
+		if (split_low(imm))
+			addiu(T9, T9, split_low(imm));
+	}
+
+	jalr();
+}
+
+void MacroAssembler::get_thread(Register thread) {
+  move(thread, SP);
+  shr(thread, PAGE_SHIFT);
+	shl(thread, 2);
+	lui(AT, ThreadLocalStorage::sp_map_high());
+	add(AT, AT, thread);
+	lw(thread, AT, ThreadLocalStorage::sp_map_low());
+	/*addi(SP, SP, -40);
+	sw(T0, SP, 0 * wordSize);
+	sw(T1, SP, 1 * wordSize);
+	sw(T2, SP, 2 * wordSize);
+	sw(T3, SP, 0 * wordSize);
+	sw(T4, SP, 0 * wordSize);
+	sw(T5, SP, 0 * wordSize);
+	sw(T6, SP, 0 * wordSize);
+	sw(T7, SP,  * wordSize);
+	sw(T8, SP, 8 * wordSize);
+	sw(T9, SP, 9 * wordSize);
+	call_VM(thread, CAST_FROM_FN_PTR(address, ThreadLocalStorage::get_thread_slow));
+	addi(SP, SP, 40);*/
+}
+#else
+void MacroAssembler::int3() {
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
+}
+
+void MacroAssembler::get_thread(Register thread) {
+  // call pthread_getspecific
+  // void * pthread_getspecific(pthread_key_t key);
+   if (thread != rax) {
+     push(rax);
+   }
+   push(rdi);
+   push(rsi);
+   push(rdx);
+   push(rcx);
+   push(r8);
+   push(r9);
+   push(r10);
+   // XXX
+   mov(r10, rsp);
+   andq(rsp, -16);
+   push(r10);
+   push(r11);
+
+   movl(rdi, ThreadLocalStorage::thread_index());
+   call(RuntimeAddress(CAST_FROM_FN_PTR(address, pthread_getspecific)));
+
+   pop(r11);
+   pop(rsp);
+   pop(r10);
+   pop(r9);
+   pop(r8);
+   pop(rcx);
+   pop(rdx);
+   pop(rsi);
+   pop(rdi);
+   if (thread != rax) {
+       mov(thread, rax);
+       pop(rax);
+   }
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,198 @@
+/*
+ * Copyright 1999-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Implementation of class atomici
+
+///////////implementation of Atomic::store*//////////////
+inline void Atomic::store     (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store     (jshort   store_value, jshort*   dest) { *dest = store_value;}
+inline void Atomic::store     (jint	    store_value, jint*     dest) { *dest = store_value; }
+//no need to consider the unaligned double word load
+inline void Atomic::store     (jlong    store_value, jlong*    dest) { *dest = store_value; }
+inline void Atomic::store_ptr	(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr (void*    store_value, void*	   dest) { *(void**)dest = store_value; }
+
+inline void Atomic::store     (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store     (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
+inline void Atomic::store     (jint     store_value, volatile jint*     dest) { *dest = store_value; }
+//no need to consider the unaligned double word load
+inline void Atomic::store     (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
+inline void Atomic::store_ptr (intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr (void*    store_value, volatile void*     dest) {*(void**)dest = store_value; }
+
+
+///////////implementation of Atomic::inc*/////////////////
+inline void Atomic::inc	    (volatile jint*	    dest) { (void)add(1, dest); }
+inline void Atomic::inc_ptr (volatile intptr_t*	dest) { (void)inc((volatile jint*)dest); }
+inline void Atomic::inc_ptr (volatile void*     dest) { (void)inc((volatile jint*)dest); }
+
+///////////implementation of Atomic::dec*/////////////////
+inline void Atomic::dec	    (volatile jint*	    dest) { (void)add(-1, dest); }
+inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)dec((volatile jint*)dest); }
+inline void Atomic::dec_ptr (volatile void*     dest) { (void)dec((volatile jint*)dest); }
+
+
+///////////implementation of Atomic::add*/////////////////
+inline jint Atomic::add	(jint add_value, volatile jint* dest) {
+	jint __ret, __tmp;
+	__asm__ __volatile__ (
+		" .set push\n"
+		" .set mips2\n"
+		" .set noreorder\n"
+
+		" 	sync					\n"
+		"1:	ll  	%[__ret], %[__dest]		\n"
+		" 	addu  %[__tmp], %[__val], %[__ret]	\n"
+		" 	sc  	%[__tmp], %[__dest]		\n"
+		" 	beqz  %[__tmp], 1b   			\n"
+		" 	nop					\n"
+		//" 	sync					\n"
+		" .set pop\n"
+
+		: [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+		: [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value)
+		: "memory"
+	);
+
+	return add_value + __ret;
+}
+
+inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) {
+	return (intptr_t)add((jint)add_value, (volatile jint*)dest);
+}
+
+inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) {
+	return (void*)add((jint)add_value, (volatile jint*)dest);
+}
+
+
+///////////implementation of Atomic::xchg*/////////////////
+inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
+	jint __ret, __tmp;
+
+  __asm__ __volatile__ (
+		" .set push\n"
+		" .set mips2\n"
+		" .set noreorder\n"
+		" 	sync\n"
+		"1:	ll  	%[__ret], %[__dest]	\n"
+		" 	move  %[__tmp], %[__val]	\n"
+		" 	sc  	%[__tmp], %[__dest]	\n"
+		" 	beqz  %[__tmp], 1b		\n"
+		"  	nop				\n"
+		//" 	sync											\n"
+		" .set pop\n"
+
+		: [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+		: [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value)
+		: "memory"
+	);
+
+	return __ret;
+}
+
+inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+	return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
+}
+
+inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
+	  return (void*)xchg((jint)exchange_value, (volatile jint*)dest);
+}
+
+///////////implementation of Atomic::cmpxchg*/////////////////
+inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value) {
+	jint __prev, __cmp;
+
+	__asm__ __volatile__ (
+		"  .set push\n"
+		"  .set mips2\n"
+		"  .set noreorder\n"
+
+		"  sync \n"
+		"1:ll   %[__prev], %[__dest]    \n"
+		"  bne  %[__prev], %[__old], 2f	\n"
+		"  move	%[__cmp], $0	\n"
+		"  move	%[__cmp], %[__new]	\n"
+		"  sc	%[__cmp], %[__dest]	\n"
+		"  beqz	%[__cmp], 1b		\n"
+		"  nop				\n"
+		"2:				\n"
+
+		"	.set pop"
+
+		: [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
+		: [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value),	[__new] "r" (exchange_value)
+		: "memory"
+	);
+
+	return __prev;
+}
+
+inline intptr_t Atomic::cmpxchg_ptr (intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) {
+	return (intptr_t)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value);
+}
+
+inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) {
+	  return (void*)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value);
+}
+
+inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value) {
+	jlong __ret;
+	jint __cmp, __new, __old, __prev;
+
+	__asm__ __volatile__ (
+		" .set push\n"
+		" .set mips3\n"
+		" .set noreorder\n"
+
+		"   sync                          \n"
+		"   ld    %[__old], %[__oa]       \n"
+		"   ld    %[__new], %[__na]       \n"
+		"1: lld   %[__prev], %[__dest]    \n"
+		"   bne   %[__prev], %[__old], 2f \n"
+		"   move  %[__cmp],  $0           \n"
+		"   move  %[__cmp], %[__new]      \n"
+		"   scd   %[__cmp], %[__dest]     \n"
+		"   beqz  %[__cmp], 1b            \n"
+		"   nop                           \n"
+		//"   sync                          \n"
+		"2:                               \n"
+		"   sd    %[__prev], %[__ret]     \n"
+
+		"	.set pop"
+
+		: [__cmp] "=&r" (__cmp),
+			[__old] "=&r" (__old),
+			[__new] "=&r" (__new),
+			[__prev] "=&r" (__prev)
+		: [__dest] "m" (*(volatile jint*)dest),
+			[__oa] "m" (*(volatile jint*)&compare_value),
+			[__na] "m" (*(volatile jint*)&exchange_value),
+			[__ret] "m" (*(volatile jint*)&__ret)
+		: "memory"
+	);
+
+	return __ret;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,40 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include <byteswap.h>
+
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+inline u2   Bytes::swap_u2(u2 x) {
+  return ((u2)(x>>8)) | ((u2)(x<<8));
+}
+
+inline u4   Bytes::swap_u4(u4 x) {
+	return (swap_u2(x)<<16) | (swap_u2(x>>16));
+}
+
+inline u8 Bytes::swap_u8(u8 x) {
+	return (((u8)swap_u4(x))<<32) | swap_u4(x>>32);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2003-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default:
+    (void)memcpy(to, from, count * HeapWordSize);
+    break;
+  }
+}
+
+static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
+  // pd_disjoint_words is word-atomic in this implementation.
+  pd_disjoint_words(from, to, count);
+}
+
+static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  pd_disjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(void* from, void* to, size_t count) {
+  (void)memmove(to, from, count);
+}
+
+static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
+  pd_conjoint_bytes(from, to, count);
+}
+
+static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+	(void)memmove(to, from, count << LogBytesPerShort);
+}
+
+static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+  assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
+  // pd_conjoint_words is word-atomic in this implementation.
+  pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
+}
+
+//use set mips3 directive, gs2 is 64 cpu actually
+//by yjl 4/27/2005
+static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+	/*jint i, tmp;
+
+	__asm__ __volatile__ (
+			"		.set push\n"
+			"		.set mips3\n"
+			"		.set noreorder\n"
+
+			"		move	%[__i], $0\n"
+			"1:	beq		%[__i], %[__cnt], 2f\n"
+			"		nop\n"
+			"		ld 		%[__tmp], 0(%[__src])\n"
+			"		addi	%[__src], %[__src], 8\n"
+			"		sd 		%[__tmp], 0(%[__dst])\n"
+			"		addi 	%[__dst], %[__dst], 8\n"
+			"		b 		1b\n"
+			"		addi	%[__i], %[__i], 1\n"
+
+			"2:	.set pop\n"
+			: [__i] "=&r" (i),
+				[__tmp] "=&r" (tmp),
+			  [__src] "=&r"(from),
+				[__dst] "=&r"(to),
+				[__cnt] "=&r"(count)
+			: "[__src]" (from), "[__dst]" (to), "[__cnt]" (count)
+			: "memory"
+			);*/
+	//tty->print_cr("%p ==> %p, size %d", from, to, count);
+
+	jint tmp;
+	jlong *_from = from, *_to = to;
+	size_t _count= count;
+	__asm__ __volatile__ (
+			"		.set push\n"
+			"		.set mips3\n"
+			"		.set noreorder\n"
+
+			"1:	ld 	%[__tmp], 0(%[__src])\n"
+			"	addi	%[__cnt], %[__cnt], -1\n"
+			"	sd		%[__tmp], 0(%[__dst])\n"
+			"	addiu	%[__src], %[__src], 8\n"
+			"   	bne	%[__cnt], $0, 1b\n"
+			"	addiu	%[__dst], %[__dst], 8\n"
+			"	.set pop\n"
+			: [__src] "=&r"(_from),
+				[__dst] "=&r"(_to),
+				[__cnt] "=&r"(_count),
+				[__tmp] "=&r" (tmp)
+			: "[__src]" (_from), "[__dst]" (_to), "[__cnt]" (_count)
+			: "memory"
+			);
+	/*tty->print_cr("old mark %x, old klass: %p, new mark %x, new klass %p", ((void**)from)[0], ((void**)from)[1],
+			((void**)to)[0], ((void**)to)[1]);*/
+}
+
+static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
+  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
+  // pd_conjoint_words is word-atomic in this implementation.
+  pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
+}
+
+static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_bytes(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+	pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
+}
+
+static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2000-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+//
+define_pd_global(bool, DontYieldALot,            false);
+// ThreadStackSize 320 allows TaggedStackInterpreter and a couple of test cases
+// to run while keeping the number of threads that can be created high.
+// System default ThreadStackSize appears to be 512 which is too big.
+define_pd_global(intx, ThreadStackSize,          320);
+define_pd_global(intx, VMThreadStackSize,        512);
+
+define_pd_global(intx, CompilerThreadStackSize,  0);
+define_pd_global(intx, SurvivorRatio,            8);
+
+define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
+
+// Only used on 64 bit Windows platforms
+define_pd_global(bool, UseVectoredExceptions,    false);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,158 @@
+//
+// Copyright 2003-2006 Sun Microsystems, Inc.  All Rights Reserved.
+// Copyright 2010 Lemote, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+//
+
+// mips32/godson2 Linux Architecture Description File
+
+//----------OS-DEPENDENT ENCODING BLOCK----------------------------------------
+// This block specifies the encoding classes used by the compiler to
+// output byte streams.  Encoding classes generate functions which are
+// called by Machine Instruction Nodes in order to generate the bit
+// encoding of the instruction.  Operands specify their base encoding
+// interface with the interface keyword.  There are currently
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
+// COND_INTER.  REG_INTER causes an operand to generate a function
+// which returns its register number when queried.  CONST_INTER causes
+// an operand to generate a function which returns the value of the
+// constant when queried.  MEMORY_INTER causes an operand to generate
+// four functions which return the Base Register, the Index Register,
+// the Scale Value, and the Offset Value of the operand when queried.
+// COND_INTER causes an operand to generate six functions which return
+// the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional
+// instruction.  Instructions specify two basic values for encoding.
+// They use the ins_encode keyword to specify their encoding class
+// (which must be one of the class names specified in the encoding
+// block), and they use the opcode keyword to specify, in order, their
+// primary, secondary, and tertiary opcode.  Only the opcode sections
+// which a particular instruction needs for encoding need to be
+// specified.
+encode %{
+  // Build emit functions for each basic byte or larger field in the intel
+  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
+  // code in the enc_class source block.  Emit functions will live in the
+  // main source block for now.  In future, we can generalize this by
+  // adding a syntax that specifies the sizes of fields in an order,
+  // so that the adlc can build the emit functions automagically
+
+  enc_class Java_To_Runtime(method meth)
+  %{
+  %}
+
+  enc_class linux_breakpoint
+  %{
+    MacroAssembler* masm = new MacroAssembler(&cbuf);
+    masm->call(CAST_FROM_FN_PTR(address, os::breakpoint), relocInfo::runtime_call_type);
+  %}
+
+  enc_class call_epilog
+  %{
+    if (VerifyStackAtCalls) {
+      // Check that stack depth is unchanged: find majik cookie on stack
+      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-2));
+      if(framesize >= 128) {
+	emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
+	emit_d8(cbuf,0xBC);
+	emit_d8(cbuf,0x24);
+	emit_d32(cbuf,framesize); // Find majik cookie from ESP
+	emit_d32(cbuf, 0xbadb100d);
+      }
+      else {
+	emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
+	emit_d8(cbuf,0x7C);
+	emit_d8(cbuf,0x24);
+	emit_d8(cbuf,framesize); // Find majik cookie from ESP
+	emit_d32(cbuf, 0xbadb100d);
+      }
+      // jmp EQ around INT3
+      // QQQ TODO
+      const int jump_around = 5; // size of call to breakpoint, 1 for CC
+      emit_opcode(cbuf, 0x74);
+      emit_d8(cbuf, jump_around);
+      // QQQ temporary
+      emit_break(cbuf);
+      // Die if stack mismatch
+      // emit_opcode(cbuf,0xCC);
+    }
+  %}
+
+%}
+
+// INSTRUCTIONS -- Platform dependent
+
+//----------OS and Locking Instructions----------------------------------------
+
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{
+%{
+  match(Set dst (ThreadLocal));
+  effect(DEF dst, KILL cr);
+
+  format %{ "MOV    EAX, Thread::current()" %}
+  ins_encode( linux_tlsencode(dst) );
+  ins_pipe( ialu_reg_fat );
+%}
+
+// Die now
+instruct ShouldNotReachHere()
+%{
+  match(Halt);
+
+  // Use the following format syntax
+  format %{ "int3\t# ShouldNotReachHere" %}
+  // QQQ TODO for now call breakpoint
+  // opcode(0xCC);
+  // ins_encode(Opc);
+  ins_encode(linux_breakpoint);
+  ins_pipe(pipe_slow);
+%}
+
+
+// Platform dependent source
+
+source
+%{
+// emit an interrupt that is caught by the debugger
+void emit_break(CodeBuffer& cbuf) {
+  // Debugger doesn't really catch this but best we can do so far QQQ
+#define __ masm.
+		__ lui(T9, Assembler::split_high((int)os::breakpoint));
+		__ addiu(T9, T9, Assembler::split_low((int)os::breakpoint));
+		__ jalr(T9);
+		__ delayed()->nop();
+
+}
+
+void MachBreakpointNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  emit_break(cbuf);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
+  //return 5;
+	return 16;
+}
+
+%}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,22 @@
+#
+# Copyright 2004-2007 Sun Microsystems, Inc.  All Rights Reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+# CA 95054 USA or visit www.sun.com if you need additional information or
+# have any questions.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Implementation of class OrderAccess.
+
+inline void OrderAccess::loadload()   { acquire(); }
+inline void OrderAccess::storestore() { release(); }
+inline void OrderAccess::loadstore()  { acquire(); }
+inline void OrderAccess::storeload()  { fence(); }
+
+//ensures load before completed
+inline void OrderAccess::acquire() {
+	__asm__ __volatile__(
+			" .set push\n"
+			" .set mips2\n"
+			" 	sync\n"
+			" .set pop\n"
+	);
+}
+
+//ensure store before completed
+inline void OrderAccess::release() {
+	__asm__ __volatile__(
+			" .set push\n"
+			" .set mips2\n"
+			" 	sync\n"
+			" .set pop\n"
+	);
+}
+
+//ensure SL
+inline void OrderAccess::fence() {
+	__asm__ __volatile__(
+			" .set push\n"
+			" .set mips2\n"
+			" 	sync\n"
+			" .set pop\n"
+	);
+}
+
+//implementation of load_acquire
+inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { return *p; }
+inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { return *p; }
+inline jint     OrderAccess::load_acquire(volatile jint*    p) { return *p; }
+inline jlong    OrderAccess::load_acquire(volatile jlong*   p) { return *p; }
+inline jubyte   OrderAccess::load_acquire(volatile jubyte*  p) { return *p; }
+inline jushort  OrderAccess::load_acquire(volatile jushort* p) { return *p; }
+inline juint    OrderAccess::load_acquire(volatile juint*   p) { return *p; }
+inline julong   OrderAccess::load_acquire(volatile julong*  p) { return *p; }
+inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) { return *p; }
+inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) { return *p; }
+
+//implementation of load_ptr_acquire
+inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p) { return *p; }
+inline void*    OrderAccess::load_ptr_acquire(volatile void*       p) { return *(void* volatile *)p; }
+inline void*    OrderAccess::load_ptr_acquire(const volatile void* p) { return *(void* const volatile *)p; }
+
+//implementation of release_store
+inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { *p = v; }
+inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { *p = v; }
+inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { *p = v; }
+inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { *p = v; }
+inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { *p = v; }
+inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { *p = v; }
+inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { *p = v; }
+inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { *p = v; }
+inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { *p = v; }
+inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { *p = v; }
+
+//implementation of release_store_ptr
+inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { *p = v; }
+inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { *(void* volatile *)p = v; }
+
+//implementation of store_fence
+inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jint*    p, jint    v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); }
+
+//implementation of store_ptr_fence
+inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); }
+inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; fence(); }
+
+//implementation of release_store_fence
+inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { *p = v; fence(); }
+
+//implementaion of release_store_ptr_fence
+inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { *p = v; fence(); }
+inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { *(void* volatile *)p = v; fence(); }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,730 @@
+/*
+ * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// do not include  precompiled  header file
+# include "incls/_os_linux_mips.cpp.incl"
+
+// put OS-includes here
+# include <sys/types.h>
+# include <sys/mman.h>
+# include <pthread.h>
+# include <signal.h>
+# include <errno.h>
+# include <dlfcn.h>
+# include <stdlib.h>
+# include <stdio.h>
+# include <unistd.h>
+# include <sys/resource.h>
+# include <pthread.h>
+# include <sys/stat.h>
+# include <sys/time.h>
+# include <sys/utsname.h>
+# include <sys/socket.h>
+# include <sys/wait.h>
+# include <pwd.h>
+# include <poll.h>
+# include <ucontext.h>
+# include <fpu_control.h>
+
+#define REG_SP 29
+#define REG_FP 30
+
+address os::current_stack_pointer() {
+	register void *sp __asm__ ("$29");
+        return (address) sp;
+}
+
+char* os::non_memory_address_word() {
+  // Must never look like an address returned by reserve_memory,
+  // even in its subfields (as defined by the CPU immediate fields,
+  // if the CPU splits constants across multiple instructions).
+
+  return (char*) -1;
+}
+
+void os::initialize_thread() {
+// Nothing to do.
+}
+
+//the next three method just exists in os::Linux, none in other os, by yjl 6/21/2005
+address os::Linux::ucontext_get_pc(ucontext_t * uc) {
+  //return (address)uc->uc_mcontext.gregs[REG_PC];
+  return (address)uc->uc_mcontext.pc;//aoqi:what is gregs?
+}
+
+intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
+}
+
+intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
+// is currently interrupted by SIGPROF.
+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
+// frames. Currently we don't do that on Linux, so it's the same as
+// os::fetch_frame_from_context().
+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
+  ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  assert(thread != NULL, "just checking");
+  assert(ret_sp != NULL, "just checking");
+  assert(ret_fp != NULL, "just checking");
+
+  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
+}
+
+ExtendedPC os::fetch_frame_from_context(void* ucVoid,
+                    intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  ExtendedPC  epc;
+  ucontext_t* uc = (ucontext_t*)ucVoid;
+
+  if (uc != NULL) {
+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
+    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
+  } else {
+    // construct empty ExtendedPC for return value checking
+    epc = ExtendedPC(NULL);
+    if (ret_sp) *ret_sp = (intptr_t *)NULL;
+    if (ret_fp) *ret_fp = (intptr_t *)NULL;
+  }
+
+  return epc;
+}
+
+frame os::fetch_frame_from_context(void* ucVoid) {
+  intptr_t* sp;
+  intptr_t* fp;
+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
+  return frame(sp, fp, epc.pc());
+}
+
+// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
+// turned off by -fomit-frame-pointer,
+frame os::get_sender_for_C_frame(frame* fr) {
+  //return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+  //printf("c frame sp = 0x%x, fp=0x%x, pc=0x%x \n", (int)fr->sp(),(int)fr->fp(),(int)fr->pc());
+	//printf("c frame send_sp =0x%x, fp = 0x%x, pc = 0x%x \n",
+	//		(int) fr->sender_sp(), (int) fr->link(), (int)fr->sender_pc());
+	return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+}
+
+//intptr_t* _get_previous_fp() {
+//see StubGenerator::generate_get_previous_fp in stubGenerator_gs2.cpp
+jint* os::get_previous_fp() {
+	int *pc;
+	int sp;
+	int *pc_limit = (int*)(void*)&os::get_previous_fp;
+	int insn;
+
+	{
+l_pc:;
+		 pc = (int*)&&l_pc;
+		 __asm__ __volatile__ ("move %0,  $sp" : "=r" (sp));
+	}
+
+	do {
+		--pc;
+		insn = *pc;
+		switch(bitfield(insn, 16, 16)) {
+		case 0x27bd:	/* addiu $sp,$sp,-i */
+		case 0x23bd:	/* addi $sp,$sp,-i */
+		case 0x67bd:	/* daddiu $sp,$sp,-i */
+		case 0x63bd:	/* daddi $sp,$sp,-i */
+			assert ((short)bitfield(insn, 0, 16)<0, "bad frame");
+			sp -=	(short)bitfield(insn, 0, 16);
+			return (jint*)sp;
+		}
+	} while (pc>pc_limit);
+
+	ShouldNotReachHere();
+}
+
+frame os::current_frame() {
+ printf("@@@@@@@@@@@@@@@@@@@get_previous_fp = 0x%x \n", (int)(get_previous_fp()));
+  frame myframe((intptr_t*)os::current_stack_pointer(),
+                (intptr_t*)get_previous_fp(),
+                CAST_FROM_FN_PTR(address, os::current_frame));
+  if (os::is_first_C_frame(&myframe)) {
+    // stack is not walkable
+    return frame(NULL, NULL, NULL);
+  } else {
+    return os::get_sender_for_C_frame(&myframe);
+  }
+}
+
+//x86 add 2 new assemble function here!
+extern "C" int
+JVM_handle_linux_signal(int sig,
+                        siginfo_t* info,
+                        void* ucVoid,
+                        int abort_if_unrecognized) {
+	tty->print_cr("signal infomation:\n\
+			signo   = %x, \n\
+			sicode  = %x, \n\
+			sierrno = %x, \n\
+			siaddr  = %x,\n",
+			info->si_signo,
+			info->si_code,
+			info->si_errno,
+			info->si_addr);
+
+  ucontext_t* uc = (ucontext_t*) ucVoid;
+
+  Thread* t = ThreadLocalStorage::get_thread_slow();
+
+  SignalHandlerMark shm(t);
+
+  // Note: it's not uncommon that JNI code uses signal/sigset to install
+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+  // or have a SIGILL handler when detecting CPU type). When that happens,
+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
+  // that do not require siginfo/ucontext first.
+
+  //if (sig == SIGPIPE || sig == SIGXFSZ) {
+  if (sig == SIGPIPE) {
+    // allow chained handler to go first
+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
+      return true;
+    } else {
+      if (PrintMiscellaneous && (WizardMode || Verbose)) {
+        warning("Ignoring SIGPIPE - see bug 4229104");
+      }
+      return true;
+    }
+  }
+
+  JavaThread* thread = NULL;
+  VMThread* vmthread = NULL;
+  if (os::Linux::signal_handlers_are_installed) {
+    if (t != NULL ){
+      if(t->is_Java_thread()) {
+	tty->print_cr("this thread is a java thread\n");
+        thread = (JavaThread*)t;
+      }
+      else if(t->is_VM_thread()){
+	tty->print_cr("this thread is a VM thread\n");
+        vmthread = (VMThread *)t;
+      }
+    }
+  }
+
+  // decide if this trap can be handled by a stub
+  address stub = NULL;
+  address pc   = NULL;
+
+  pc = (address) os::Linux::ucontext_get_pc(uc);
+  tty->print_cr("pc=%x", pc);
+  //%note os_trap_1
+  if (info != NULL && uc != NULL && thread != NULL) {
+    pc = (address) os::Linux::ucontext_get_pc(uc);
+    // Handle ALL stack overflow variations here
+    if (sig == SIGSEGV) {
+      address addr = (address) info->si_addr;
+      // check if fault address is within thread stack
+      printf("handle all stack overflow variations\n");
+      printf("addr = %x, stack base = %x, stack top = %x\n",
+	      addr,
+	      thread->stack_base(),
+	      thread->stack_base() - thread->stack_size());
+
+      if (addr < thread->stack_base() &&
+          addr >= thread->stack_base() - thread->stack_size()) {
+        // stack overflow
+        printf("stack exception check \n");
+        if (thread->in_stack_yellow_zone(addr)) {
+	  printf("exception addr is in yellow zone\n");
+          thread->disable_stack_yellow_zone();
+          if (thread->thread_state() == _thread_in_Java) {
+            // Throw a stack overflow exception.  Guard pages will be reenabled
+            // while unwinding the stack.
+	    printf("this thread is in java\n");
+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+          } else {
+            // Thread was in the vm or native code.  Return and try to finish.
+	    printf("this thread is in vm or native codes and return\n");
+            return 1;
+          }
+        } else if (thread->in_stack_red_zone(addr)) {
+          // Fatal red zone violation.  Disable the guard pages and fall through
+          // to handle_unexpected_exception way down below.
+	  printf("exception addr is in red zone\n");
+          thread->disable_stack_red_zone();
+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+        } else {
+          // Accessing stack address below sp may cause SEGV if current
+          // thread has MAP_GROWSDOWN stack. This should only happen when
+          // current thread was created by user code with MAP_GROWSDOWN flag
+          // and then attached to VM. See notes in os_linux.cpp.
+	  printf("exception addr is neither in yellow zone nor in the red one\n");
+          if (thread->osthread()->expanding_stack() == 0) {
+             thread->osthread()->set_expanding_stack();
+             if (os::Linux::manually_expand_stack(thread, addr)) {
+               thread->osthread()->clear_expanding_stack();
+               return 1;
+             }
+             thread->osthread()->clear_expanding_stack();
+          } else {
+             fatal("recursive segv. expanding stack.");
+          }
+        }
+      } //addr <
+    } //sig == SIGSEGV
+
+    if (thread->thread_state() == _thread_in_Java) {
+      // Java thread running in Java code => find exception handler if any
+      // a fault inside compiled code, the interpreter, or a stub
+      tty->print("java thread running in java code\n");
+      tty->print_cr("polling address = %x", os::get_polling_page());
+      if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+
+        stub = SharedRuntime::get_poll_stub(pc);
+      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
+        // BugId 4454115: A read from a MappedByteBuffer can fault
+        // here if the underlying file has been truncated.
+        // Do not crash the VM in such a case.
+        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+        nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
+	printf("cb = %x, nm = %x\n", cb, nm);
+        if (nm != NULL && nm->has_unsafe_access()) {
+          stub = StubRoutines::handler_for_unsafe_access();
+        }
+      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
+        // HACK: si_code does not work on linux 2.2.12-20!!!
+        int op = pc[0] & 0x3f;
+	int op1 = pc[3] & 0x3f;
+      	//FIXME, Must port to mips code!!
+        switch (op) {
+          case 0x1e:	//ddiv
+          case 0x1f:	//ddivu
+          case 0x1a:	//div
+          case 0x1b:	//divu
+            stub = SharedRuntime::continuation_for_implicit_exception(thread,
+                                    pc,
+                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
+	    break;
+          default:
+          // TODO: handle more cases if we are using other x86 instructions
+          //   that can generate SIGFPE signal on linux.
+          tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
+          //fatal("please update this code.");
+      	}
+      }
+     else if (sig == SIGSEGV &&
+               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
+          // Determination of interpreter/vtable stub/compiled code null exception
+          printf("continuation for implicit exception\n");
+          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+      }
+    } else if (thread->thread_state() == _thread_in_vm &&
+               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+               thread->doing_unsafe_access()) {
+	tty->print_cr("SIGBUS in vm thread \n");
+        stub = StubRoutines::handler_for_unsafe_access();
+    }
+
+    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
+    // and the heap gets shrunk before the field access.
+    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
+	printf("jni fast get trap\n");
+      address addr = JNI_FastGetField::find_slowcase_pc(pc);
+      if (addr != (address)-1) {
+        stub = addr;
+      }
+      tty->print_cr("addr = %d, stub = %d\n", addr, stub);
+    }
+
+    // Check to see if we caught the safepoint code in the
+    // process of write protecting the memory serialization page.
+    // It write enables the page immediately after protecting it
+    // so we can just return to retry the write.
+    if ((sig == SIGSEGV) &&
+        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
+      // Block current thread until the memory serialize page permission restored.
+      printf("write protecting the memory serialiazation page\n");
+      os::block_on_serialize_page_trap();
+      return true;
+    }
+  }
+
+  // Execution protection violation
+  //
+  // This should be kept as the last step in the triage.  We don't
+  // have a dedicated trap number for a no-execute fault, so be
+  // conservative and allow other handlers the first shot.
+  //
+  // Note: We don't test that info->si_code == SEGV_ACCERR here.
+  // this si_code is so generic that it is almost meaningless; and
+  // the si_code for this condition may change in the future.
+  // Furthermore, a false-positive should be harmless.
+  if (UnguardOnExecutionViolation > 0 &&
+      //(sig == SIGSEGV || sig == SIGBUS) &&
+      //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) {
+      (sig == SIGSEGV || sig == SIGBUS
+#ifdef OPT_RANGECHECK
+			 || sig == SIGSYS
+#endif
+			) &&
+			//(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) {
+			(uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) {
+			//aoqi: copy from jdk1.5, dont understand the struct mcontext_t.
+    tty->print_cr("execution protection violation\n");
+
+    int page_size = os::vm_page_size();
+    address addr = (address) info->si_addr;
+    address pc = os::Linux::ucontext_get_pc(uc);
+    // Make sure the pc and the faulting address are sane.
+    //
+    // If an instruction spans a page boundary, and the page containing
+    // the beginning of the instruction is executable but the following
+    // page is not, the pc and the faulting address might be slightly
+    // different - we still want to unguard the 2nd page in this case.
+    //
+    // 15 bytes seems to be a (very) safe value for max instruction size.
+    bool pc_is_near_addr =
+      (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
+    bool instr_spans_page_boundary =
+      (align_size_down((intptr_t) pc ^ (intptr_t) addr,
+                       (intptr_t) page_size) > 0);
+
+    if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
+      static volatile address last_addr =
+        (address) os::non_memory_address_word();
+
+      // In conservative mode, don't unguard unless the address is in the VM
+      if (addr != last_addr &&
+          (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {
+
+        // Set memory to RWX and retry
+        address page_start =
+          (address) align_size_down((intptr_t) addr, (intptr_t) page_size);
+        //bool res = os::protect_memory((char*) page_start, page_size,
+        //                              os::MEM_PROT_RWX);
+        bool res = os::unguard_memory((char*) page_start, page_size);//aoqi:?
+
+        if (PrintMiscellaneous && Verbose) {
+          char buf[256];
+          jio_snprintf(buf, sizeof(buf), "Execution protection violation "
+                       "at " INTPTR_FORMAT
+                       ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr,
+                       page_start, (res ? "success" : "failed"), errno);
+          tty->print_raw_cr(buf);
+        }
+        stub = pc;
+
+        // Set last_addr so if we fault again at the same address, we don't end
+        // up in an endless loop.
+        //
+        // There are two potential complications here.  Two threads trapping at
+        // the same address at the same time could cause one of the threads to
+        // think it already unguarded, and abort the VM.  Likely very rare.
+        //
+        // The other race involves two threads alternately trapping at
+        // different addresses and failing to unguard the page, resulting in
+        // an endless loop.  This condition is probably even more unlikely than
+        // the first.
+        //
+        // Although both cases could be avoided by using locks or thread local
+        // last_addr, these solutions are unnecessary complication: this
+        // handler is a best-effort safety net, not a complete solution.  It is
+        // disabled by default and should only be used as a workaround in case
+        // we missed any no-execute-unsafe VM code.
+
+        last_addr = addr;
+      }
+    }
+  }
+
+  if (stub != NULL) {
+    tty->print_cr("resolved stub=%d\n",stub);
+    // save all thread context in case we need to restore it
+    if (thread != NULL) thread->set_saved_exception_pc(pc);
+
+    uc->uc_mcontext.pc = (greg_t)stub;
+    return true;
+  }
+
+  // signal-chaining
+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
+     tty->print_cr("signal chaining\n");
+     return true;
+  }
+
+  if (!abort_if_unrecognized) {
+    // caller wants another chance, so give it to him
+    tty->print_cr("abort becauce of unrecognized\n");
+    return false;
+  }
+
+  if (pc == NULL && uc != NULL) {
+    pc = os::Linux::ucontext_get_pc(uc);
+  }
+
+  // unmask current signal
+  sigset_t newset;
+  sigemptyset(&newset);
+  sigaddset(&newset, sig);
+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
+	tty->print_cr("VMError in signal handler\n");
+  VMError err(t, sig, pc, info, ucVoid);
+  err.report_and_die();
+
+  ShouldNotReachHere();
+}
+
+void os::Linux::init_thread_fpu_state(void) {
+  // set fpu to 53 bit precision
+  //set_fpu_control_word(0x27f);
+}
+
+int os::Linux::get_fpu_control_word(void) {
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+}
+
+bool os::is_allocatable(size_t bytes) {
+
+  if (bytes < 2 * G) {
+    return true;
+  }
+
+  char* addr = reserve_memory(bytes, NULL);
+
+  if (addr != NULL) {
+    release_memory(addr, bytes);
+  }
+
+  return addr != NULL;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+size_t os::Linux::min_stack_allowed  = 96 * K;
+
+
+// Test if pthread library can support variable thread stack size. LinuxThreads
+// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
+// in floating stack mode and NPTL support variable stack size.
+bool os::Linux::supports_variable_stack_size() {
+  if (os::Linux::is_NPTL()) {
+     // NPTL, yes
+     return true;
+
+  } else {
+    // Note: We can't control default stack size when creating a thread.
+    // If we use non-default stack size (pthread_attr_setstacksize), both
+    // floating stack and non-floating stack LinuxThreads will return the
+    // same value. This makes it impossible to implement this function by
+    // detecting thread stack size directly.
+    //
+    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
+    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
+    // %gs (either as LDT selector or GDT selector, depending on kernel)
+    // to access thread specific data.
+    //
+    // Note that %gs is a reserved glibc register since early 2001, so
+    // applications are not allowed to change its value (Ulrich Drepper from
+    // Red Hat confirmed that all known offenders have been modified to use
+    // either %fs or TSD). In the worst case scenario, when VM is embedded in
+    // a native application that plays with %gs, we might see non-zero %gs
+    // even LinuxThreads is running in fixed stack mode. As the result, we'll
+    // return true and skip _thread_safety_check(), so we may not be able to
+    // detect stack-heap collisions. But otherwise it's harmless.
+    //
+		//FIXME we should do something here not just return false. by yjl 6/21/2005
+    return false;
+  }
+}
+
+// return default stack size for thr_type
+//maybe we need change this, FIXME by yjl 6/21/2005
+size_t os::Linux::default_stack_size(os::ThreadType thr_type) {
+  // default stack size (compiler thread needs larger stack)
+  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
+  return s;
+}
+
+size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
+  // Creating guard page is very expensive. Java thread has HotSpot
+  // guard page, only enable glibc guard page for non-Java threads.
+  return (thr_type == java_thread ? 0 : page_size());
+}
+
+// Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\  JavaThread created by VM does not have glibc
+//    |    glibc guard page    | - guard, attached Java thread usually has
+//    |                        |/  1 page glibc guard.
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |  HotSpot Guard Pages   | - red and yellow pages
+//    |                        |/
+//    +------------------------+ JavaThread::stack_yellow_zone_base()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// Non-Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\
+//    |  glibc guard page      | - usually 1 page
+//    |                        |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from
+//    pthread_attr_getstack()
+
+static void current_stack_region(address * bottom, size_t * size) {
+  if (os::Linux::is_initial_thread()) {
+     // initial thread needs special handling because pthread_getattr_np()
+     // may return bogus value.
+     *bottom = os::Linux::initial_thread_stack_bottom();
+     *size   = os::Linux::initial_thread_stack_size();
+  } else {
+     pthread_attr_t attr;
+
+     int rslt = pthread_getattr_np(pthread_self(), &attr);
+
+     // JVM needs to know exact stack location, abort if it fails
+     if (rslt != 0) {
+       if (rslt == ENOMEM) {
+         vm_exit_out_of_memory(0, "pthread_getattr_np");
+       } else {
+         fatal1("pthread_getattr_np failed with errno = %d", rslt);
+       }
+     }
+
+     if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0 ) {
+			 fatal("Can not locate current stack attributes!");
+		 }
+		 /*
+		 void * top;
+		 if (pthread_attr_getstackaddr(&attr, &top) != 0 ||
+				 pthread_attr_getstacksize(&attr, size) != 0) {
+		   fatal("Can not locate current stack attributes!");
+		 }
+			*/
+     pthread_attr_destroy(&attr);
+
+     //*bottom = (address) align_size_up((uintptr_t)top - *size, os::Linux::page_size());
+     //*size   = (address)top - *bottom;
+  }
+  assert(os::current_stack_pointer() >= *bottom &&
+         os::current_stack_pointer() < *bottom + *size, "just checking");
+}
+
+address os::current_stack_base() {
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return (bottom + size);
+}
+
+size_t os::current_stack_size() {
+  // stack size includes normal stack and HotSpot guard pages
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return size;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+
+void os::print_context(outputStream *st, void *context) {
+  if (context == NULL) return;
+
+  ucontext_t *uc = (ucontext_t*)context;
+  st->print_cr("Registers:");
+  st->print(  "i0=" INTPTR_FORMAT, uc->uc_mcontext.gregs[0]);
+  st->print(", i1=" INTPTR_FORMAT, uc->uc_mcontext.gregs[1]);
+  st->print(", i2=" INTPTR_FORMAT, uc->uc_mcontext.gregs[2]);
+  st->print(", i3=" INTPTR_FORMAT, uc->uc_mcontext.gregs[3]);
+  st->cr();
+  st->print(  "i4=" INTPTR_FORMAT, uc->uc_mcontext.gregs[4]);
+  st->print(", i5=" INTPTR_FORMAT, uc->uc_mcontext.gregs[5]);
+  st->print(", i6=" INTPTR_FORMAT, uc->uc_mcontext.gregs[6]);
+  st->print(", i7=" INTPTR_FORMAT, uc->uc_mcontext.gregs[7]);
+  st->cr();
+  st->print(  "i8=" INTPTR_FORMAT, uc->uc_mcontext.gregs[8]);
+  st->print(", i9=" INTPTR_FORMAT, uc->uc_mcontext.gregs[9]);
+  st->print(", i10=" INTPTR_FORMAT, uc->uc_mcontext.gregs[10]);
+  st->print(", i11=" INTPTR_FORMAT, uc->uc_mcontext.gregs[11]);
+  st->cr();
+  st->print(  "i12=" INTPTR_FORMAT, uc->uc_mcontext.gregs[12]);
+  st->print(", i13=" INTPTR_FORMAT, uc->uc_mcontext.gregs[13]);
+  st->print(", i14=" INTPTR_FORMAT, uc->uc_mcontext.gregs[14]);
+  st->print(", i15=" INTPTR_FORMAT, uc->uc_mcontext.gregs[15]);
+  st->cr();
+  st->print(  "i16=" INTPTR_FORMAT, uc->uc_mcontext.gregs[16]);
+  st->print(", i17=" INTPTR_FORMAT, uc->uc_mcontext.gregs[17]);
+  st->print(", i18=" INTPTR_FORMAT, uc->uc_mcontext.gregs[18]);
+  st->print(", i19=" INTPTR_FORMAT, uc->uc_mcontext.gregs[19]);
+  st->cr();
+  st->print(  "i20=" INTPTR_FORMAT, uc->uc_mcontext.gregs[20]);
+  st->print(", i21=" INTPTR_FORMAT, uc->uc_mcontext.gregs[21]);
+  st->print(", i22=" INTPTR_FORMAT, uc->uc_mcontext.gregs[22]);
+  st->print(", i23=" INTPTR_FORMAT, uc->uc_mcontext.gregs[23]);
+  st->cr();
+  st->print(  "i24=" INTPTR_FORMAT, uc->uc_mcontext.gregs[24]);
+  st->print(", i25=" INTPTR_FORMAT, uc->uc_mcontext.gregs[25]);
+  st->print(", i26=" INTPTR_FORMAT, uc->uc_mcontext.gregs[26]);
+  st->print(", i27=" INTPTR_FORMAT, uc->uc_mcontext.gregs[27]);
+  st->cr();
+  st->print(  "i28=" INTPTR_FORMAT, uc->uc_mcontext.gregs[28]);
+  st->print(", i29=" INTPTR_FORMAT, uc->uc_mcontext.gregs[29]);
+  st->print(", i30=" INTPTR_FORMAT, uc->uc_mcontext.gregs[30]);
+  st->print(", i31=" INTPTR_FORMAT, uc->uc_mcontext.gregs[31]);
+  st->cr();
+  st->cr();
+
+  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", sp);
+  //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t));
+  print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t));
+  st->cr();
+
+  // Note: it may be unsafe to inspect memory near pc. For example, pc may
+  // point to garbage if entry point in an nmethod is corrupted. Leave
+  // this at the end, and hope for the best.
+  address pc = os::Linux::ucontext_get_pc(uc);
+  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", pc);
+  print_hex_dump(st, pc - 16, pc + 16, sizeof(char));
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,32 @@
+/*
+ * Copyright 1999-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  static void setup_fpu() {}
+	static bool is_allocatable(size_t bytes);
+	static jint *get_previous_fp();
+
+  // Used to register dynamic code cache area with the OS
+  // Note: Currently only used in 64 bit Windows implementations
+  static bool register_code_area(char *low, char *high) { return true; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+
+inline void Prefetch::read (void *loc, intx interval) {
+}
+
+inline void Prefetch::write(void *loc, intx interval) {
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,91 @@
+/*
+ * Copyright 1999-2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_threadLS_linux_mips.cpp.incl"
+
+// Map stack pointer (%esp) to thread pointer for faster TLS access
+//
+// Here we use a flat table for better performance. Getting current thread
+// is down to one memory access (read _sp_map[%esp>>12]) in generated code
+// and two in runtime code (-fPIC code needs an extra load for _sp_map).
+//
+// This code assumes stack page is not shared by different threads. It works
+// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters).
+//
+// Notice that _sp_map is allocated in the bss segment, which is ZFOD
+// (zero-fill-on-demand). While it reserves 4M address space upfront,
+// actual memory pages are committed on demand.
+//
+// If an application creates and destroys a lot of threads, usually the
+// stack space freed by a thread will soon get reused by new thread
+// (this is especially true in NPTL or LinuxThreads in fixed-stack mode).
+// No memory page in _sp_map is wasted.
+//
+// However, it's still possible that we might end up populating &
+// committing a large fraction of the 4M table over time, but the actual
+// amount of live data in the table could be quite small. The max wastage
+// is less than 4M bytes. If it becomes an issue, we could use madvise()
+// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map.
+// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the
+// physical memory page (i.e. similar to MADV_FREE on Solaris).
+
+Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
+int ThreadLocalStorage::_sp_map_low;
+int ThreadLocalStorage::_sp_map_high;
+
+void ThreadLocalStorage::generate_code_for_get_thread() {
+    // nothing we can do here for user-level thread
+}
+
+void ThreadLocalStorage::pd_init() {
+	assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(),
+			"page size must be multiple of PAGE_SIZE");
+
+	_sp_map_low = Assembler::split_low((int)_sp_map);
+	_sp_map_high = Assembler::split_high((int)_sp_map);
+}
+
+void ThreadLocalStorage::pd_set_thread(Thread* thread) {
+  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
+
+  address stack_top = os::current_stack_base();
+  size_t stack_size = os::current_stack_size();
+
+  for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) {
+    // pd_set_thread() is called with non-NULL value when a new thread is
+    // created/attached, or with NULL value when a thread is about to exit.
+    // If both "thread" and the corresponding _sp_map[] entry are non-NULL,
+    // they should have the same value. Otherwise it might indicate that the
+    // stack page is shared by multiple threads. However, a more likely cause
+    // for this assertion to fail is that an attached thread exited without
+    // detaching itself from VM, which is a program error and could cause VM
+    // to crash.
+    assert(thread == NULL || _sp_map[(uintptr_t)p >> PAGE_SHIFT] == NULL ||
+           thread == _sp_map[(uintptr_t)p >> PAGE_SHIFT],
+           "thread exited without detaching from VM??");
+    _sp_map[(uintptr_t)p >> PAGE_SHIFT] = thread;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2007-2008 Reservoir Labs, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Processor dependent parts of ThreadLocalStorage
+//only the low 2G space for user program in Linux
+
+#define SP_BITLENGTH  31
+#define PAGE_SHIFT    12
+#define PAGE_SIZE     (1UL << PAGE_SHIFT)
+
+static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
+static int _sp_map_low;
+static int _sp_map_high;
+
+public:
+  static Thread** sp_map_addr() { return _sp_map; }
+  static int sp_map_low() { return _sp_map_low; }
+  static int sp_map_high() { return _sp_map_high; }
+
+  static Thread* thread() {
+	  uintptr_t sp;
+		__asm__ volatile ("addi %0, $29, 0" : "=r" (sp));
+		return _sp_map[sp >> PAGE_SHIFT];
+	}
+/*
+public:
+
+  static Thread* thread()
+  {
+    return (Thread*) os::thread_local_storage_at(thread_index());
+  }
+ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2003-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_thread_linux_mips.cpp.incl"
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
+// currently interrupted by SIGPROF
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
+  void* ucontext, bool isInJava) {
+
+  assert(Thread::current() == this, "caller must be current thread");
+  assert(this->is_Java_thread(), "must be JavaThread");
+
+  JavaThread* jt = (JavaThread *)this;
+
+  // If we have a last_Java_frame, then we should use it even if
+  // isInJava == true.  It should be more reliable than ucontext info.
+  if (jt->has_last_Java_frame()) {
+    *fr_addr = jt->pd_last_frame();
+    return true;
+  }
+
+  // At this point, we don't have a last_Java_frame, so
+  // we try to glean some information out of the ucontext
+  // if we were running Java code when SIGPROF came in.
+  if (isInJava) {
+    ucontext_t* uc = (ucontext_t*) ucontext;
+
+    intptr_t* ret_fp;
+    intptr_t* ret_sp;
+    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
+      &ret_sp, &ret_fp);
+    if (addr.pc() == NULL || ret_sp == NULL ) {
+      // ucontext wasn't useful
+      return false;
+    }
+
+    frame ret_frame(ret_sp, ret_fp, addr.pc());
+    if (!ret_frame.safe_for_sender(jt)) {
+#ifdef COMPILER2
+      // C2 uses ebp as a general register see if NULL fp helps
+      frame ret_frame2(ret_sp, NULL, addr.pc());
+      if (!ret_frame2.safe_for_sender(jt)) {
+        // nothing else to try if the frame isn't good
+        return false;
+      }
+      ret_frame = ret_frame2;
+#else
+      // nothing else to try if the frame isn't good
+      return false;
+#endif /* COMPILER2 */
+    }
+    *fr_addr = ret_frame;
+    return true;
+  }
+
+  // nothing else to try
+  return false;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+ private:
+  void pd_initialize() {
+    _anchor.clear();
+  }
+
+  frame pd_last_frame() {
+    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+    if (_anchor.last_Java_pc() != NULL) {
+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+    } else {
+      // This will pick up pc from sp
+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
+    }
+  }
+
+ public:
+  // Mutators are highly dangerous....
+  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
+  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
+
+  void set_base_of_stack_pointer(intptr_t* base_sp) {
+  }
+
+  static ByteSize last_Java_fp_offset()          {
+    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
+  }
+
+  intptr_t* base_of_stack_pointer() {
+    return NULL;
+  }
+  void record_base_of_stack_pointer() {
+  }
+
+  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
+    bool isInJava);
+
+  // These routines are only used on cpu architectures that
+  // have separate register stacks (Itanium).
+  static bool register_stack_overflow() { return false; }
+  static void enable_register_stack_guard() {}
+  static void disable_register_stack_guard() {}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* Threads (NOTE: incomplete) */                                                                                                   \
+  /******************************/                                                                                                   \
+  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)                         \
+  /* This must be the last entry, and must be present */                                                                             \
+  last_entry()
+
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type, last_entry) \
+                                                                          \
+  /**********************/                                                \
+  /* Posix Thread IDs   */                                                \
+  /**********************/                                                \
+                                                                          \
+  declare_integer_type(pid_t)                                             \
+  declare_unsigned_integer_type(pthread_t)                                \
+                                                                          \
+  /* This must be the last entry, and must be present */                  \
+  last_entry()
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
+                                                                        \
+  /* This must be the last entry, and must be present */                \
+  last_entry()
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant, last_entry) \
+                                                                        \
+  /* This must be the last entry, and must be present */                \
+  last_entry()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_vm_version_linux_mips.cpp.incl"
--- a/hotspot/src/share/vm/asm/codeBuffer.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/asm/codeBuffer.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -307,6 +308,9 @@
     assert(rtype == relocInfo::none              ||
            rtype == relocInfo::runtime_call_type ||
            rtype == relocInfo::internal_word_type||
+#ifdef MIPS32
+					 rtype == relocInfo::internal_pc_type  ||
+#endif
            rtype == relocInfo::section_word_type ||
            rtype == relocInfo::external_word_type,
            "code needs relocation information");
@@ -523,7 +527,6 @@
     assert(buf_limit % HeapWordSize == 0, "buf must be evenly sized");
   }
   // if dest == NULL, this is just the sizing pass
-
   csize_t code_end_so_far = 0;
   csize_t code_point_so_far = 0;
   for (int n = 0; n < (int)SECT_LIMIT; n++) {
--- a/hotspot/src/share/vm/asm/register.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/asm/register.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -34,8 +34,8 @@
 // and vice versa. A concrete implementation may just map the register onto 'this'.

 class AbstractRegisterImpl {
- protected:
-  int value() const                              { return (int)(intx)this; }
+	protected:
+		int value() const                              { return (int)(intx)this; }
 };


@@ -78,134 +78,134 @@
 #define AS_REGISTER(type,name)         ((type)name##_##type##EnumValue)

 #define CONSTANT_REGISTER_DECLARATION(type, name, value) \
-extern const type name;                                  \
+	extern const type name;                                  \
 enum { name##_##type##EnumValue = (value) }

 #define REGISTER_DECLARATION(type, name, value) \
-extern const type name;                         \
+	extern const type name;                         \
 enum { name##_##type##EnumValue = value##_##type##EnumValue }

 #define REGISTER_DEFINITION(type, name) \
-const type name = ((type)name##_##type##EnumValue)
+	const type name = ((type)name##_##type##EnumValue)


 // Debugging support

 inline void assert_different_registers(
-  AbstractRegister a,
-  AbstractRegister b
-) {
-  assert(
-    a != b,
-    "registers must be different"
-  );
+		AbstractRegister a,
+		AbstractRegister b
+		) {
+	assert(
+			a != b,
+			"registers must be different"
+	      );
 }


 inline void assert_different_registers(
-  AbstractRegister a,
-  AbstractRegister b,
-  AbstractRegister c
-) {
-  assert(
-    a != b && a != c
-           && b != c,
-    "registers must be different"
-  );
+		AbstractRegister a,
+		AbstractRegister b,
+		AbstractRegister c
+		) {
+	assert(
+			a != b && a != c
+			&& b != c,
+			"registers must be different"
+	      );
 }


 inline void assert_different_registers(
-  AbstractRegister a,
-  AbstractRegister b,
-  AbstractRegister c,
-  AbstractRegister d
-) {
-  assert(
-    a != b && a != c && a != d
-           && b != c && b != d
-                     && c != d,
-    "registers must be different"
-  );
+		AbstractRegister a,
+		AbstractRegister b,
+		AbstractRegister c,
+		AbstractRegister d
+		) {
+	assert(
+			a != b && a != c && a != d
+			&& b != c && b != d
+			&& c != d,
+			"registers must be different"
+	      );
 }


 inline void assert_different_registers(
-  AbstractRegister a,
-  AbstractRegister b,
-  AbstractRegister c,
-  AbstractRegister d,
-  AbstractRegister e
-) {
-  assert(
-    a != b && a != c && a != d && a != e
-           && b != c && b != d && b != e
-                     && c != d && c != e
-                               && d != e,
-    "registers must be different"
-  );
+		AbstractRegister a,
+		AbstractRegister b,
+		AbstractRegister c,
+		AbstractRegister d,
+		AbstractRegister e
+		) {
+	assert(
+			a != b && a != c && a != d && a != e
+			&& b != c && b != d && b != e
+			&& c != d && c != e
+			&& d != e,
+			"registers must be different"
+	      );
 }


 inline void assert_different_registers(
-  AbstractRegister a,
-  AbstractRegister b,
-  AbstractRegister c,
-  AbstractRegister d,
-  AbstractRegister e,
-  AbstractRegister f
-) {
-  assert(
-    a != b && a != c && a != d && a != e && a != f
-           && b != c && b != d && b != e && b != f
-                     && c != d && c != e && c != f
-                               && d != e && d != f
-                                         && e != f,
-    "registers must be different"
-  );
+		AbstractRegister a,
+		AbstractRegister b,
+		AbstractRegister c,
+		AbstractRegister d,
+		AbstractRegister e,
+		AbstractRegister f
+		) {
+	assert(
+			a != b && a != c && a != d && a != e && a != f
+			&& b != c && b != d && b != e && b != f
+			&& c != d && c != e && c != f
+			&& d != e && d != f
+			&& e != f,
+			"registers must be different"
+	      );
 }


 inline void assert_different_registers(
-  AbstractRegister a,
-  AbstractRegister b,
-  AbstractRegister c,
-  AbstractRegister d,
-  AbstractRegister e,
-  AbstractRegister f,
-  AbstractRegister g
-) {
-  assert(
-    a != b && a != c && a != d && a != e && a != f && a != g
-           && b != c && b != d && b != e && b != f && b != g
-                     && c != d && c != e && c != f && c != g
-                               && d != e && d != f && d != g
-                                         && e != f && e != g
-                                                   && f != g,
-    "registers must be different"
-  );
+		AbstractRegister a,
+		AbstractRegister b,
+		AbstractRegister c,
+		AbstractRegister d,
+		AbstractRegister e,
+		AbstractRegister f,
+		AbstractRegister g
+		) {
+	assert(
+			a != b && a != c && a != d && a != e && a != f && a != g
+			&& b != c && b != d && b != e && b != f && b != g
+			&& c != d && c != e && c != f && c != g
+			&& d != e && d != f && d != g
+			&& e != f && e != g
+			&& f != g,
+			"registers must be different"
+	      );
 }


 inline void assert_different_registers(
-  AbstractRegister a,
-  AbstractRegister b,
-  AbstractRegister c,
-  AbstractRegister d,
-  AbstractRegister e,
-  AbstractRegister f,
-  AbstractRegister g,
-  AbstractRegister h
-) {
-  assert(
-    a != b && a != c && a != d && a != e && a != f && a != g && a != h
-           && b != c && b != d && b != e && b != f && b != g && b != h
-                     && c != d && c != e && c != f && c != g && c != h
-                               && d != e && d != f && d != g && d != h
-                                         && e != f && e != g && e != h
-                                                   && f != g && f != h
-                                                             && g != h,
-    "registers must be different"
-  );
+		AbstractRegister a,
+		AbstractRegister b,
+		AbstractRegister c,
+		AbstractRegister d,
+		AbstractRegister e,
+		AbstractRegister f,
+		AbstractRegister g,
+		AbstractRegister h
+		) {
+	assert(
+			a != b && a != c && a != d && a != e && a != f && a != g && a != h
+			&& b != c && b != d && b != e && b != f && b != g && b != h
+			&& c != d && c != e && c != f && c != g && c != h
+			&& d != e && d != f && d != g && d != h
+			&& e != f && e != g && e != h
+			&& f != g && f != h
+			&& g != h,
+			"registers must be different"
+	      );
 }
--- a/hotspot/src/share/vm/c1/c1_CodeStubs.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_CodeStubs.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -339,7 +339,7 @@
     load_klass_id
   };
   enum constants {
-    patch_info_size = 3
+    patch_info_size = NOT_MIPS32(3) MIPS32_ONLY(4)
   };
  private:
   PatchID       _id;
--- a/hotspot/src/share/vm/c1/c1_Compilation.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_Compilation.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -27,43 +27,43 @@


 typedef enum {
-  _t_compile,
-  _t_setup,
-  _t_optimizeIR,
-  _t_buildIR,
-  _t_emit_lir,
-  _t_linearScan,
-  _t_lirGeneration,
-  _t_lir_schedule,
-  _t_codeemit,
-  _t_codeinstall,
-  max_phase_timers
+	_t_compile,
+	_t_setup,
+	_t_optimizeIR,
+	_t_buildIR,
+	_t_emit_lir,
+	_t_linearScan,
+	_t_lirGeneration,
+	_t_lir_schedule,
+	_t_codeemit,
+	_t_codeinstall,
+	max_phase_timers
 } TimerName;

 static const char * timer_name[] = {
-  "compile",
-  "setup",
-  "optimizeIR",
-  "buildIR",
-  "emit_lir",
-  "linearScan",
-  "lirGeneration",
-  "lir_schedule",
-  "codeemit",
-  "codeinstall"
+	"compile",
+	"setup",
+	"optimizeIR",
+	"buildIR",
+	"emit_lir",
+	"linearScan",
+	"lirGeneration",
+	"lir_schedule",
+	"codeemit",
+	"codeinstall"
 };

 static elapsedTimer timers[max_phase_timers];
 static int totalInstructionNodes = 0;

 class PhaseTraceTime: public TraceTime {
- private:
-  JavaThread* _thread;
+	private:
+		JavaThread* _thread;

- public:
-  PhaseTraceTime(TimerName timer):
-    TraceTime("", &timers[timer], CITime || CITimeEach, Verbose) {
-  }
+	public:
+		PhaseTraceTime(TimerName timer):
+			TraceTime("", &timers[timer], CITime || CITimeEach, Verbose) {
+			}
 };

 Arena* Compilation::_arena = NULL;
@@ -75,452 +75,467 @@
 #ifndef PRODUCT

 void Compilation::maybe_print_current_instruction() {
-  if (_current_instruction != NULL && _last_instruction_printed != _current_instruction) {
-    _last_instruction_printed = _current_instruction;
-    _current_instruction->print_line();
-  }
+	if (_current_instruction != NULL && _last_instruction_printed != _current_instruction) {
+		_last_instruction_printed = _current_instruction;
+		_current_instruction->print_line();
+	}
 }
 #endif // PRODUCT


 DebugInformationRecorder* Compilation::debug_info_recorder() const {
-  return _env->debug_info();
+	return _env->debug_info();
 }


 Dependencies* Compilation::dependency_recorder() const {
-  return _env->dependencies();
+	return _env->dependencies();
 }


 void Compilation::initialize() {
-  // Use an oop recorder bound to the CI environment.
-  // (The default oop recorder is ignorant of the CI.)
-  OopRecorder* ooprec = new OopRecorder(_env->arena());
-  _env->set_oop_recorder(ooprec);
-  _env->set_debug_info(new DebugInformationRecorder(ooprec));
-  debug_info_recorder()->set_oopmaps(new OopMapSet());
-  _env->set_dependencies(new Dependencies(_env));
+		//printf("************in file %s, line is %d*****************\n",__FILE__,__LINE__);//by_css
+	// Use an oop recorder bound to the CI environment.
+	// (The default oop recorder is ignorant of the CI.)
+	OopRecorder* ooprec = new OopRecorder(_env->arena());
+	_env->set_oop_recorder(ooprec);
+	_env->set_debug_info(new DebugInformationRecorder(ooprec));
+	debug_info_recorder()->set_oopmaps(new OopMapSet());
+	_env->set_dependencies(new Dependencies(_env));
 }


 void Compilation::build_hir() {
-  CHECK_BAILOUT();
+	CHECK_BAILOUT();

-  // setup ir
-  _hir = new IR(this, method(), osr_bci());
-  if (!_hir->is_valid()) {
-    bailout("invalid parsing");
-    return;
-  }
+	// setup ir
+	_hir = new IR(this, method(), osr_bci());
+	if (!_hir->is_valid()) {
+		bailout("invalid parsing");
+		return;
+	}

 #ifndef PRODUCT
-  if (PrintCFGToFile) {
-    CFGPrinter::print_cfg(_hir, "After Generation of HIR", true, false);
-  }
+	if (PrintCFGToFile) {
+		CFGPrinter::print_cfg(_hir, "After Generation of HIR", true, false);
+	}
 #endif

 #ifndef PRODUCT
-  if (PrintCFG || PrintCFG0) { tty->print_cr("CFG after parsing"); _hir->print(true); }
-  if (PrintIR  || PrintIR0 ) { tty->print_cr("IR after parsing"); _hir->print(false); }
+	if (PrintCFG || PrintCFG0) { tty->print_cr("CFG after parsing"); _hir->print(true); }
+	if (PrintIR  || PrintIR0 ) { tty->print_cr("IR after parsing"); _hir->print(false); }
 #endif

-  _hir->verify();
+	_hir->verify();

-  if (UseC1Optimizations) {
-    NEEDS_CLEANUP
-    // optimization
-    PhaseTraceTime timeit(_t_optimizeIR);
+	if (UseC1Optimizations) {
+		NEEDS_CLEANUP
+			// optimization
+			PhaseTraceTime timeit(_t_optimizeIR);

-    _hir->optimize();
-  }
+		_hir->optimize();
+	}

-  _hir->verify();
+	_hir->verify();

-  _hir->split_critical_edges();
+	_hir->split_critical_edges();

 #ifndef PRODUCT
-  if (PrintCFG || PrintCFG1) { tty->print_cr("CFG after optimizations"); _hir->print(true); }
-  if (PrintIR  || PrintIR1 ) { tty->print_cr("IR after optimizations"); _hir->print(false); }
+	if (PrintCFG || PrintCFG1) { tty->print_cr("CFG after optimizations"); _hir->print(true); }
+	if (PrintIR  || PrintIR1 ) { tty->print_cr("IR after optimizations"); _hir->print(false); }
 #endif

-  _hir->verify();
+	_hir->verify();

-  // compute block ordering for code generation
-  // the control flow must not be changed from here on
-  _hir->compute_code();
+	// compute block ordering for code generation
+	// the control flow must not be changed from here on
+	_hir->compute_code();

-  if (UseGlobalValueNumbering) {
-    ResourceMark rm;
-    int instructions = Instruction::number_of_instructions();
-    GlobalValueNumbering gvn(_hir);
-    assert(instructions == Instruction::number_of_instructions(),
-           "shouldn't have created an instructions");
-  }
+	if (UseGlobalValueNumbering) {
+		ResourceMark rm;
+		int instructions = Instruction::number_of_instructions();
+		GlobalValueNumbering gvn(_hir);
+		assert(instructions == Instruction::number_of_instructions(),
+				"shouldn't have created an instructions");
+	}

-  // compute use counts after global value numbering
-  _hir->compute_use_counts();
+	// compute use counts after global value numbering
+	_hir->compute_use_counts();

 #ifndef PRODUCT
-  if (PrintCFG || PrintCFG2) { tty->print_cr("CFG before code generation"); _hir->code()->print(true); }
-  if (PrintIR  || PrintIR2 ) { tty->print_cr("IR before code generation"); _hir->code()->print(false, true); }
+	if (PrintCFG || PrintCFG2) { tty->print_cr("CFG before code generation"); _hir->code()->print(true); }
+	if (PrintIR  || PrintIR2 ) { tty->print_cr("IR before code generation"); _hir->code()->print(false, true); }
 #endif

-  _hir->verify();
+	_hir->verify();
 }


 void Compilation::emit_lir() {
-  CHECK_BAILOUT();
+	CHECK_BAILOUT();

-  LIRGenerator gen(this, method());
-  {
-    PhaseTraceTime timeit(_t_lirGeneration);
-    hir()->iterate_linear_scan_order(&gen);
-  }
+	LIRGenerator gen(this, method());
+	{
+		PhaseTraceTime timeit(_t_lirGeneration);
+		hir()->iterate_linear_scan_order(&gen);
+	}

-  CHECK_BAILOUT();
+	CHECK_BAILOUT();

-  {
-    PhaseTraceTime timeit(_t_linearScan);
+	{
+		PhaseTraceTime timeit(_t_linearScan);

-    LinearScan* allocator = new LinearScan(hir(), &gen, frame_map());
-    set_allocator(allocator);
-    // Assign physical registers to LIR operands using a linear scan algorithm.
-    allocator->do_linear_scan();
-    CHECK_BAILOUT();
+		LinearScan* allocator = new LinearScan(hir(), &gen, frame_map());
+		set_allocator(allocator);
+		// Assign physical registers to LIR operands using a linear scan algorithm.
+		allocator->do_linear_scan();
+		CHECK_BAILOUT();

-    _max_spills = allocator->max_spills();
-  }
+		_max_spills = allocator->max_spills();
+	}

-  if (BailoutAfterLIR) {
-    if (PrintLIR && !bailed_out()) {
-      print_LIR(hir()->code());
-    }
-    bailout("Bailing out because of -XX:+BailoutAfterLIR");
-  }
+	if (BailoutAfterLIR) {
+		if (PrintLIR && !bailed_out()) {
+			print_LIR(hir()->code());
+		}
+		bailout("Bailing out because of -XX:+BailoutAfterLIR");
+	}
 }


 void Compilation::emit_code_epilog(LIR_Assembler* assembler) {
-  CHECK_BAILOUT();
+	CHECK_BAILOUT();

-  // generate code or slow cases
-  assembler->emit_slow_case_stubs();
-  CHECK_BAILOUT();
+	// generate code or slow cases
+	assembler->emit_slow_case_stubs();
+	CHECK_BAILOUT();

-  // generate exception adapters
-  assembler->emit_exception_entries(exception_info_list());
-  CHECK_BAILOUT();
+	// generate exception adapters
+	assembler->emit_exception_entries(exception_info_list());
+	CHECK_BAILOUT();

-  // generate code for exception handler
-  assembler->emit_exception_handler();
-  CHECK_BAILOUT();
-  assembler->emit_deopt_handler();
-  CHECK_BAILOUT();
+	// generate code for exception handler
+	assembler->emit_exception_handler();
+	CHECK_BAILOUT();
+	assembler->emit_deopt_handler();
+	CHECK_BAILOUT();

-  // done
-  masm()->flush();
+	// done
+	masm()->flush();
 }


 int Compilation::emit_code_body() {
-  // emit code
-  Runtime1::setup_code_buffer(code(), allocator()->num_calls());
-  code()->initialize_oop_recorder(env()->oop_recorder());
+	// emit code
+	Runtime1::setup_code_buffer(code(), allocator()->num_calls());
+	code()->initialize_oop_recorder(env()->oop_recorder());

-  _masm = new C1_MacroAssembler(code());
-  _masm->set_oop_recorder(env()->oop_recorder());
+	_masm = new C1_MacroAssembler(code());
+	_masm->set_oop_recorder(env()->oop_recorder());

-  LIR_Assembler lir_asm(this);
+	LIR_Assembler lir_asm(this);

-  lir_asm.emit_code(hir()->code());
-  CHECK_BAILOUT_(0);
+	lir_asm.emit_code(hir()->code());
+	CHECK_BAILOUT_(0);

-  emit_code_epilog(&lir_asm);
-  CHECK_BAILOUT_(0);
+	emit_code_epilog(&lir_asm);
+	CHECK_BAILOUT_(0);

-  generate_exception_handler_table();
+	generate_exception_handler_table();

 #ifndef PRODUCT
-  if (PrintExceptionHandlers && Verbose) {
-    exception_handler_table()->print();
-  }
+	if (PrintExceptionHandlers && Verbose) {
+		exception_handler_table()->print();
+	}
 #endif /* PRODUCT */

-  return frame_map()->framesize();
+	return frame_map()->framesize();
 }


 int Compilation::compile_java_method() {
-  assert(!method()->is_native(), "should not reach here");
+	assert(!method()->is_native(), "should not reach here");

-  if (BailoutOnExceptionHandlers) {
-    if (method()->has_exception_handlers()) {
-      bailout("linear scan can't handle exception handlers");
-    }
-  }
+	if (BailoutOnExceptionHandlers) {
+		if (method()->has_exception_handlers()) {
+			bailout("linear scan can't handle exception handlers");
+		}
+	}

-  CHECK_BAILOUT_(no_frame_size);
+	CHECK_BAILOUT_(no_frame_size);

-  {
-    PhaseTraceTime timeit(_t_buildIR);
-  build_hir();
-  }
-  if (BailoutAfterHIR) {
-    BAILOUT_("Bailing out because of -XX:+BailoutAfterHIR", no_frame_size);
-  }
+	{
+		PhaseTraceTime timeit(_t_buildIR);
+		build_hir();
+	}
+	if (BailoutAfterHIR) {
+		BAILOUT_("Bailing out because of -XX:+BailoutAfterHIR", no_frame_size);
+	}


-  {
-    PhaseTraceTime timeit(_t_emit_lir);
+	{
+		PhaseTraceTime timeit(_t_emit_lir);
+
+		_frame_map = new FrameMap(method(), hir()->number_of_locks(), MAX2(4, hir()->max_stack()));
+

-    _frame_map = new FrameMap(method(), hir()->number_of_locks(), MAX2(4, hir()->max_stack()));
-    emit_lir();
-  }
-  CHECK_BAILOUT_(no_frame_size);
+		emit_lir();
+	}
+	CHECK_BAILOUT_(no_frame_size);

-  {
-    PhaseTraceTime timeit(_t_codeemit);
-    return emit_code_body();
-  }
+	{
+		PhaseTraceTime timeit(_t_codeemit);
+
+
+		return emit_code_body();
+
+	}
 }

 void Compilation::install_code(int frame_size) {
-  // frame_size is in 32-bit words so adjust it intptr_t words
-  assert(frame_size == frame_map()->framesize(), "must match");
-  assert(in_bytes(frame_map()->framesize_in_bytes()) % sizeof(intptr_t) == 0, "must be at least pointer aligned");
-  _env->register_method(
-    method(),
-    osr_bci(),
-    &_offsets,
-    in_bytes(_frame_map->sp_offset_for_orig_pc()),
-    code(),
-    in_bytes(frame_map()->framesize_in_bytes()) / sizeof(intptr_t),
-    debug_info_recorder()->_oopmaps,
-    exception_handler_table(),
-    implicit_exception_table(),
-    compiler(),
-    _env->comp_level(),
-    needs_debug_information(),
-    has_unsafe_access()
-  );
+	// frame_size is in 32-bit words so adjust it intptr_t words
+
+	assert(frame_size == frame_map()->framesize(), "must match");
+	assert(in_bytes(frame_map()->framesize_in_bytes()) % sizeof(intptr_t) == 0, "must be at least pointer aligned");
+	_env->register_method(
+			method(),
+			osr_bci(),
+			&_offsets,
+			in_bytes(_frame_map->sp_offset_for_orig_pc()),
+			code(),
+			in_bytes(frame_map()->framesize_in_bytes()) / sizeof(intptr_t),
+			debug_info_recorder()->_oopmaps,
+			exception_handler_table(),
+			implicit_exception_table(),
+			compiler(),
+			_env->comp_level(),
+			needs_debug_information(),
+			has_unsafe_access()
+			);
+
 }


 void Compilation::compile_method() {
-  // setup compilation
-  initialize();
+	// setup compilation
+	initialize();
+		//printf("************in file %s, line is %d*****************\n",__FILE__,__LINE__);//by_css

-  if (!method()->can_be_compiled()) {
-    // Prevent race condition 6328518.
-    // This can happen if the method is obsolete or breakpointed.
-    bailout("Bailing out because method is not compilable");
-    return;
-  }
+	if (!method()->can_be_compiled()) {
+		// Prevent race condition 6328518.
+		// This can happen if the method is obsolete or breakpointed.
+		bailout("Bailing out because method is not compilable");
+		return;
+	}

-  if (JvmtiExport::can_hotswap_or_post_breakpoint()) {
-    // We can assert evol_method because method->can_be_compiled is true.
-    dependency_recorder()->assert_evol_method(method());
-  }
+	if (JvmtiExport::can_hotswap_or_post_breakpoint()) {
+		// We can assert evol_method because method->can_be_compiled is true.
+		dependency_recorder()->assert_evol_method(method());
+	}

-  if (method()->break_at_execute()) {
-    BREAKPOINT;
-  }
+	if (method()->break_at_execute()) {
+		BREAKPOINT;
+	}

 #ifndef PRODUCT
-  if (PrintCFGToFile) {
-    CFGPrinter::print_compilation(this);
-  }
+	if (PrintCFGToFile) {
+		CFGPrinter::print_compilation(this);
+	}
 #endif

-  // compile method
-  int frame_size = compile_java_method();
-
-  // bailout if method couldn't be compiled
-  // Note: make sure we mark the method as not compilable!
-  CHECK_BAILOUT();
+		//printf("************in file %s, line is %d*****************\n",__FILE__,__LINE__);//by_css
+	// compile method
+	int frame_size = compile_java_method();

-  if (InstallMethods) {
-    // install code
-    PhaseTraceTime timeit(_t_codeinstall);
-    install_code(frame_size);
-  }
-  totalInstructionNodes += Instruction::number_of_instructions();
+		//printf("************in file %s, line is %d*****************\n",__FILE__,__LINE__);//by_css
+	// bailout if method couldn't be compiled
+	// Note: make sure we mark the method as not compilable!
+	CHECK_BAILOUT();//by_css
+		//printf("_bailout_msg is %s*************************\n",_bailout_msg);
+		//printf("************in file %s, line is %d*****************\n",__FILE__,__LINE__);//by_css
+
+	if (InstallMethods) {
+		// install code
+		//printf("************in file %s, line is %d*****************\n",__FILE__,__LINE__);//by_css
+		PhaseTraceTime timeit(_t_codeinstall);
+		install_code(frame_size);
+	}
+		//printf("************in file %s, line is %d*****************\n",__FILE__,__LINE__);//by_css
+	totalInstructionNodes += Instruction::number_of_instructions();
 }


 void Compilation::generate_exception_handler_table() {
-  // Generate an ExceptionHandlerTable from the exception handler
-  // information accumulated during the compilation.
-  ExceptionInfoList* info_list = exception_info_list();
+	// Generate an ExceptionHandlerTable from the exception handler
+	// information accumulated during the compilation.
+	ExceptionInfoList* info_list = exception_info_list();

-  if (info_list->length() == 0) {
-    return;
-  }
+	if (info_list->length() == 0) {
+		return;
+	}

-  // allocate some arrays for use by the collection code.
-  const int num_handlers = 5;
-  GrowableArray<intptr_t>* bcis = new GrowableArray<intptr_t>(num_handlers);
-  GrowableArray<intptr_t>* scope_depths = new GrowableArray<intptr_t>(num_handlers);
-  GrowableArray<intptr_t>* pcos = new GrowableArray<intptr_t>(num_handlers);
+	// allocate some arrays for use by the collection code.
+	const int num_handlers = 5;
+	GrowableArray<intptr_t>* bcis = new GrowableArray<intptr_t>(num_handlers);
+	GrowableArray<intptr_t>* scope_depths = new GrowableArray<intptr_t>(num_handlers);
+	GrowableArray<intptr_t>* pcos = new GrowableArray<intptr_t>(num_handlers);

-  for (int i = 0; i < info_list->length(); i++) {
-    ExceptionInfo* info = info_list->at(i);
-    XHandlers* handlers = info->exception_handlers();
+	for (int i = 0; i < info_list->length(); i++) {
+		ExceptionInfo* info = info_list->at(i);
+		XHandlers* handlers = info->exception_handlers();

-    // empty the arrays
-    bcis->trunc_to(0);
-    scope_depths->trunc_to(0);
-    pcos->trunc_to(0);
+		// empty the arrays
+		bcis->trunc_to(0);
+		scope_depths->trunc_to(0);
+		pcos->trunc_to(0);

-    for (int i = 0; i < handlers->length(); i++) {
-      XHandler* handler = handlers->handler_at(i);
-      assert(handler->entry_pco() != -1, "must have been generated");
+		for (int i = 0; i < handlers->length(); i++) {
+			XHandler* handler = handlers->handler_at(i);
+			assert(handler->entry_pco() != -1, "must have been generated");

-      int e = bcis->find(handler->handler_bci());
-      if (e >= 0 && scope_depths->at(e) == handler->scope_count()) {
-        // two different handlers are declared to dispatch to the same
-        // catch bci.  During parsing we created edges for each
-        // handler but we really only need one.  The exception handler
-        // table will also get unhappy if we try to declare both since
-        // it's nonsensical.  Just skip this handler.
-        continue;
-      }
+			int e = bcis->find(handler->handler_bci());
+			if (e >= 0 && scope_depths->at(e) == handler->scope_count()) {
+				// two different handlers are declared to dispatch to the same
+				// catch bci.  During parsing we created edges for each
+				// handler but we really only need one.  The exception handler
+				// table will also get unhappy if we try to declare both since
+				// it's nonsensical.  Just skip this handler.
+				continue;
+			}

-      bcis->append(handler->handler_bci());
-      if (handler->handler_bci() == -1) {
-        // insert a wildcard handler at scope depth 0 so that the
-        // exception lookup logic with find it.
-        scope_depths->append(0);
-      } else {
-        scope_depths->append(handler->scope_count());
-    }
-      pcos->append(handler->entry_pco());
+			bcis->append(handler->handler_bci());
+			if (handler->handler_bci() == -1) {
+				// insert a wildcard handler at scope depth 0 so that the
+				// exception lookup logic with find it.
+				scope_depths->append(0);
+			} else {
+				scope_depths->append(handler->scope_count());
+			}
+			pcos->append(handler->entry_pco());

-      // stop processing once we hit a catch any
-      if (handler->is_catch_all()) {
-        assert(i == handlers->length() - 1, "catch all must be last handler");
-  }
-    }
-    exception_handler_table()->add_subtable(info->pco(), bcis, scope_depths, pcos);
-  }
+			// stop processing once we hit a catch any
+			if (handler->is_catch_all()) {
+				assert(i == handlers->length() - 1, "catch all must be last handler");
+			}
+		}
+		exception_handler_table()->add_subtable(info->pco(), bcis, scope_depths, pcos);
+	}
 }


-Compilation::Compilation(AbstractCompiler* compiler, ciEnv* env, ciMethod* method, int osr_bci)
-: _compiler(compiler)
-, _env(env)
-, _method(method)
-, _osr_bci(osr_bci)
-, _hir(NULL)
-, _max_spills(-1)
-, _frame_map(NULL)
-, _masm(NULL)
-, _has_exception_handlers(false)
-, _has_fpu_code(true)   // pessimistic assumption
-, _has_unsafe_access(false)
-, _bailout_msg(NULL)
-, _exception_info_list(NULL)
-, _allocator(NULL)
-, _code(Runtime1::get_buffer_blob()->instructions_begin(),
-        Runtime1::get_buffer_blob()->instructions_size())
-, _current_instruction(NULL)
+	Compilation::Compilation(AbstractCompiler* compiler, ciEnv* env, ciMethod* method, int osr_bci)
+	: _compiler(compiler)
+	, _env(env)
+	, _method(method)
+	, _osr_bci(osr_bci)
+	, _hir(NULL)
+	, _max_spills(-1)
+	, _frame_map(NULL)
+	, _masm(NULL)
+	, _has_exception_handlers(false)
+	, _has_fpu_code(true)   // pessimistic assumption
+	, _has_unsafe_access(false)
+	, _bailout_msg(NULL)
+	, _exception_info_list(NULL)
+	, _allocator(NULL)
+	  , _code(Runtime1::get_buffer_blob()->instructions_begin(),
+			  Runtime1::get_buffer_blob()->instructions_size())
+			, _current_instruction(NULL)
 #ifndef PRODUCT
-, _last_instruction_printed(NULL)
+			, _last_instruction_printed(NULL)
 #endif // PRODUCT
 {
-  PhaseTraceTime timeit(_t_compile);
+	PhaseTraceTime timeit(_t_compile);

-  assert(_arena == NULL, "shouldn't only one instance of Compilation in existence at a time");
-  _arena = Thread::current()->resource_area();
-  _compilation = this;
-  _needs_debug_information = JvmtiExport::can_examine_or_deopt_anywhere() ||
-                               JavaMonitorsInStackTrace || AlwaysEmitDebugInfo || DeoptimizeALot;
-  _exception_info_list = new ExceptionInfoList();
-  _implicit_exception_table.set_size(0);
-  compile_method();
+	assert(_arena == NULL, "shouldn't only one instance of Compilation in existence at a time");
+	_arena = Thread::current()->resource_area();
+	_compilation = this;
+	_needs_debug_information = JvmtiExport::can_examine_or_deopt_anywhere() ||
+		JavaMonitorsInStackTrace || AlwaysEmitDebugInfo || DeoptimizeALot;
+	_exception_info_list = new ExceptionInfoList();
+	_implicit_exception_table.set_size(0);
+	compile_method();
 }

 Compilation::~Compilation() {
-  _arena = NULL;
-  _compilation = NULL;
+	_arena = NULL;
+	_compilation = NULL;
 }


 void Compilation::add_exception_handlers_for_pco(int pco, XHandlers* exception_handlers) {
 #ifndef PRODUCT
-  if (PrintExceptionHandlers && Verbose) {
-    tty->print_cr("  added exception scope for pco %d", pco);
-  }
+	if (PrintExceptionHandlers && Verbose) {
+		tty->print_cr("  added exception scope for pco %d", pco);
+	}
 #endif
-  // Note: we do not have program counters for these exception handlers yet
-  exception_info_list()->push(new ExceptionInfo(pco, exception_handlers));
+	// Note: we do not have program counters for these exception handlers yet
+	exception_info_list()->push(new ExceptionInfo(pco, exception_handlers));
 }


 void Compilation::notice_inlined_method(ciMethod* method) {
-  _env->notice_inlined_method(method);
+	_env->notice_inlined_method(method);
 }


 void Compilation::bailout(const char* msg) {
-  assert(msg != NULL, "bailout message must exist");
-  if (!bailed_out()) {
-    // keep first bailout message
-    if (PrintBailouts) tty->print_cr("compilation bailout: %s", msg);
-    _bailout_msg = msg;
-  }
+	assert(msg != NULL, "bailout message must exist");
+	if (!bailed_out()) {
+		// keep first bailout message
+		if (PrintBailouts) tty->print_cr("compilation bailout: %s", msg);
+		_bailout_msg = msg;
+	}
 }


 void Compilation::print_timers() {
-  // tty->print_cr("    Native methods         : %6.3f s, Average : %2.3f", CompileBroker::_t_native_compilation.seconds(), CompileBroker::_t_native_compilation.seconds() / CompileBroker::_total_native_compile_count);
-  float total = timers[_t_setup].seconds() + timers[_t_buildIR].seconds() + timers[_t_emit_lir].seconds() + timers[_t_lir_schedule].seconds() + timers[_t_codeemit].seconds() + timers[_t_codeinstall].seconds();
+	// tty->print_cr("    Native methods         : %6.3f s, Average : %2.3f", CompileBroker::_t_native_compilation.seconds(), CompileBroker::_t_native_compilation.seconds() / CompileBroker::_total_native_compile_count);
+	float total = timers[_t_setup].seconds() + timers[_t_buildIR].seconds() + timers[_t_emit_lir].seconds() + timers[_t_lir_schedule].seconds() + timers[_t_codeemit].seconds() + timers[_t_codeinstall].seconds();


-  tty->print_cr("    Detailed C1 Timings");
-  tty->print_cr("       Setup time:        %6.3f s (%4.1f%%)",    timers[_t_setup].seconds(),           (timers[_t_setup].seconds() / total) * 100.0);
-  tty->print_cr("       Build IR:          %6.3f s (%4.1f%%)",    timers[_t_buildIR].seconds(),         (timers[_t_buildIR].seconds() / total) * 100.0);
-  tty->print_cr("         Optimize:           %6.3f s (%4.1f%%)", timers[_t_optimizeIR].seconds(),      (timers[_t_optimizeIR].seconds() / total) * 100.0);
-  tty->print_cr("       Emit LIR:          %6.3f s (%4.1f%%)",    timers[_t_emit_lir].seconds(),        (timers[_t_emit_lir].seconds() / total) * 100.0);
-  tty->print_cr("         LIR Gen:          %6.3f s (%4.1f%%)",   timers[_t_lirGeneration].seconds(), (timers[_t_lirGeneration].seconds() / total) * 100.0);
-  tty->print_cr("         Linear Scan:      %6.3f s (%4.1f%%)",   timers[_t_linearScan].seconds(),    (timers[_t_linearScan].seconds() / total) * 100.0);
-  NOT_PRODUCT(LinearScan::print_timers(timers[_t_linearScan].seconds()));
-  tty->print_cr("       LIR Schedule:      %6.3f s (%4.1f%%)",    timers[_t_lir_schedule].seconds(),  (timers[_t_lir_schedule].seconds() / total) * 100.0);
-  tty->print_cr("       Code Emission:     %6.3f s (%4.1f%%)",    timers[_t_codeemit].seconds(),        (timers[_t_codeemit].seconds() / total) * 100.0);
-  tty->print_cr("       Code Installation: %6.3f s (%4.1f%%)",    timers[_t_codeinstall].seconds(),     (timers[_t_codeinstall].seconds() / total) * 100.0);
-  tty->print_cr("       Instruction Nodes: %6d nodes",    totalInstructionNodes);
+	tty->print_cr("    Detailed C1 Timings");
+	tty->print_cr("       Setup time:        %6.3f s (%4.1f%%)",    timers[_t_setup].seconds(),           (timers[_t_setup].seconds() / total) * 100.0);
+	tty->print_cr("       Build IR:          %6.3f s (%4.1f%%)",    timers[_t_buildIR].seconds(),         (timers[_t_buildIR].seconds() / total) * 100.0);
+	tty->print_cr("         Optimize:           %6.3f s (%4.1f%%)", timers[_t_optimizeIR].seconds(),      (timers[_t_optimizeIR].seconds() / total) * 100.0);
+	tty->print_cr("       Emit LIR:          %6.3f s (%4.1f%%)",    timers[_t_emit_lir].seconds(),        (timers[_t_emit_lir].seconds() / total) * 100.0);
+	tty->print_cr("         LIR Gen:          %6.3f s (%4.1f%%)",   timers[_t_lirGeneration].seconds(), (timers[_t_lirGeneration].seconds() / total) * 100.0);
+	tty->print_cr("         Linear Scan:      %6.3f s (%4.1f%%)",   timers[_t_linearScan].seconds(),    (timers[_t_linearScan].seconds() / total) * 100.0);
+	NOT_PRODUCT(LinearScan::print_timers(timers[_t_linearScan].seconds()));
+	tty->print_cr("       LIR Schedule:      %6.3f s (%4.1f%%)",    timers[_t_lir_schedule].seconds(),  (timers[_t_lir_schedule].seconds() / total) * 100.0);
+	tty->print_cr("       Code Emission:     %6.3f s (%4.1f%%)",    timers[_t_codeemit].seconds(),        (timers[_t_codeemit].seconds() / total) * 100.0);
+	tty->print_cr("       Code Installation: %6.3f s (%4.1f%%)",    timers[_t_codeinstall].seconds(),     (timers[_t_codeinstall].seconds() / total) * 100.0);
+	tty->print_cr("       Instruction Nodes: %6d nodes",    totalInstructionNodes);

-  NOT_PRODUCT(LinearScan::print_statistics());
+	NOT_PRODUCT(LinearScan::print_statistics());
 }


 #ifndef PRODUCT
 void Compilation::compile_only_this_method() {
-  ResourceMark rm;
-  fileStream stream(fopen("c1_compile_only", "wt"));
-  stream.print_cr("# c1 compile only directives");
-  compile_only_this_scope(&stream, hir()->top_scope());
+	ResourceMark rm;
+	fileStream stream(fopen("c1_compile_only", "wt"));
+	stream.print_cr("# c1 compile only directives");
+	compile_only_this_scope(&stream, hir()->top_scope());
 }


 void Compilation::compile_only_this_scope(outputStream* st, IRScope* scope) {
-  st->print("CompileOnly=");
-  scope->method()->holder()->name()->print_symbol_on(st);
-  st->print(".");
-  scope->method()->name()->print_symbol_on(st);
-  st->cr();
+	st->print("CompileOnly=");
+	scope->method()->holder()->name()->print_symbol_on(st);
+	st->print(".");
+	scope->method()->name()->print_symbol_on(st);
+	st->cr();
 }


 void Compilation::exclude_this_method() {
-  fileStream stream(fopen(".hotspot_compiler", "at"));
-  stream.print("exclude ");
-  method()->holder()->name()->print_symbol_on(&stream);
-  stream.print(" ");
-  method()->name()->print_symbol_on(&stream);
-  stream.cr();
-  stream.cr();
+	fileStream stream(fopen(".hotspot_compiler", "at"));
+	stream.print("exclude ");
+	method()->holder()->name()->print_symbol_on(&stream);
+	stream.print(" ");
+	method()->name()->print_symbol_on(&stream);
+	stream.cr();
+	stream.cr();
 }
 #endif
--- a/hotspot/src/share/vm/c1/c1_FrameMap.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_FrameMap.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -122,8 +122,8 @@

  protected:
 #ifndef PRODUCT
-  static void cpu_range_check (int rnr)          { assert(0 <= rnr && rnr < nof_cpu_regs, "cpu register number is too big"); }
-  static void fpu_range_check (int rnr)          { assert(0 <= rnr && rnr < nof_fpu_regs, "fpu register number is too big"); }
+  static void cpu_range_check (int rnr)          { /*assert(0 <= rnr && rnr < nof_cpu_regs, "cpu register number is too big");*/ }
+  static void fpu_range_check (int rnr)          { /*assert(0 <= rnr && rnr < nof_fpu_regs, "fpu register number is too big");*/ }
 #endif
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -63,6 +64,28 @@

 #endif

+#ifdef MIPS32
+
+FloatRegister LIR_OprDesc::as_float_reg() const {
+  	return FrameMap::nr2floatreg(fpu_regnr());
+}
+
+
+FloatRegister LIR_OprDesc::as_double_reg() const {
+  	return FrameMap::nr2floatreg(fpu_regnrHi());
+}
+
+FloatRegister LIR_OprDesc::as_fpu_lo() const {
+  	return FrameMap::nr2floatreg(fpu_regnrLo());
+}
+
+FloatRegister LIR_OprDesc::as_fpu_hi() const {
+  	return FrameMap::nr2floatreg(fpu_regnrHi());
+}
+
+#endif
+
+
 LIR_Opr LIR_OprFact::illegalOpr = LIR_OprFact::illegal();

 LIR_Opr LIR_OprFact::value_type(ValueType* type) {
@@ -258,6 +281,7 @@
 #endif
 }

+#ifndef MIPS32

 LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
@@ -290,6 +314,44 @@
 {
 }

+#else
+LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type,BlockBegin* block):
+	LIR_Op2(lir_branch, left, right, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL),
+	_cond(cond),
+	_type(type),
+	_label(block->label()),
+	_block(block),
+	_ublock(NULL),
+	_stub(NULL)
+ {
+
+ }
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type, CodeStub* stub):
+	LIR_Op2(lir_branch, left, right, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL),
+	_cond(cond),
+	_type(type),
+	_label(stub->entry()),
+	_block(NULL),
+	_ublock(NULL),
+	_stub(stub)
+ {
+
+ }
+
+LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type,BlockBegin *block, BlockBegin *ublock):
+	LIR_Op2(lir_branch, left, right, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL),
+	_cond(cond),
+	_type(type),
+	_label(block->label()),
+	_block(block),
+	_ublock(ublock),
+	_stub(NULL)
+ {
+
+ }
+
+#endif
+
 void LIR_OpBranch::change_block(BlockBegin* b) {
   assert(_block != NULL, "must have old block");
   assert(_block->label() == label(), "must be equal");
@@ -511,10 +573,14 @@
 // LIR_OpBranch;
     case lir_branch:                   // may have info, input and result register always invalid
     case lir_cond_float_branch:        // may have info, input and result register always invalid
-    {
+    {
       assert(op->as_OpBranch() != NULL, "must be");
       LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
-
+#ifdef MIPS32
+      if (opBranch->_opr1->is_valid())         do_input(opBranch->_opr1);
+      if (opBranch->_opr2->is_valid())         do_input(opBranch->_opr2);
+      if (opBranch->_tmp->is_valid())          do_temp(opBranch->_tmp);
+#endif
       if (opBranch->_info != NULL)     do_info(opBranch->_info);
       assert(opBranch->_result->is_illegal(), "not used");
       if (opBranch->_stub != NULL)     opBranch->stub()->visit(this);
@@ -522,7 +588,6 @@
       break;
     }

-
 // LIR_OpAllocObj
     case lir_alloc_object:
     {
@@ -535,6 +600,10 @@
       if (opAllocObj->_tmp2->is_valid())         do_temp(opAllocObj->_tmp2);
       if (opAllocObj->_tmp3->is_valid())         do_temp(opAllocObj->_tmp3);
       if (opAllocObj->_tmp4->is_valid())         do_temp(opAllocObj->_tmp4);
+#ifdef MIPS32
+      if (opAllocObj->_tmp5->is_valid())         do_temp(opAllocObj->_tmp5);
+      if (opAllocObj->_tmp6->is_valid())         do_temp(opAllocObj->_tmp6);
+#endif
       if (opAllocObj->_result->is_valid())       do_output(opAllocObj->_result);
                                                  do_stub(opAllocObj->_stub);
       break;
@@ -742,7 +811,9 @@
       assert(opArrayCopy->_dst->is_valid(), "used");          do_input(opArrayCopy->_dst);     do_temp(opArrayCopy->_dst);
       assert(opArrayCopy->_dst_pos->is_valid(), "used");      do_input(opArrayCopy->_dst_pos); do_temp(opArrayCopy->_dst_pos);
       assert(opArrayCopy->_length->is_valid(), "used");       do_input(opArrayCopy->_length);  do_temp(opArrayCopy->_length);
+#ifndef MIPS32
       assert(opArrayCopy->_tmp->is_valid(), "used");          do_temp(opArrayCopy->_tmp);
+#endif
       if (opArrayCopy->_info)                     do_info(opArrayCopy->_info);

       // the implementation of arraycopy always has a call into the runtime
@@ -834,6 +905,9 @@
       if (opAllocArray->_tmp2->is_valid())            do_temp(opAllocArray->_tmp2);
       if (opAllocArray->_tmp3->is_valid())            do_temp(opAllocArray->_tmp3);
       if (opAllocArray->_tmp4->is_valid())            do_temp(opAllocArray->_tmp4);
+#ifdef MIPS32
+      if (opAllocArray->_tmp5->is_valid())            do_temp(opAllocArray->_tmp5);
+#endif
       if (opAllocArray->_result->is_valid())          do_output(opAllocArray->_result);
                                                       do_stub(opAllocArray->_stub);
       break;
@@ -1057,6 +1131,7 @@


 void LIR_List::load(LIR_Address* addr, LIR_Opr src, CodeEmitInfo* info, LIR_PatchCode patch_code) {
+
   append(new LIR_Op1(
             lir_move,
             LIR_OprFact::address(addr),
@@ -1078,13 +1153,24 @@
 }

 void LIR_List::volatile_load_unsafe_reg(LIR_Opr base, LIR_Opr offset, LIR_Opr dst, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code) {
-  append(new LIR_Op1(
+#ifndef MIPS32
+	append(new LIR_Op1(
             lir_move,
             LIR_OprFact::address(new LIR_Address(base, offset, type)),
             dst,
             type,
             patch_code,
             info, lir_move_volatile));
+#else
+	add(base, offset, base);
+	append(new LIR_Op1(
+            lir_move,
+	    LIR_OprFact::address(new LIR_Address(base, 0, type)),
+	    dst,
+            type,
+            patch_code,
+            info, lir_move_volatile));
+#endif
 }


@@ -1140,6 +1226,7 @@
 }

 void LIR_List::volatile_store_unsafe_reg(LIR_Opr src, LIR_Opr base, LIR_Opr offset, BasicType type, CodeEmitInfo* info, LIR_PatchCode patch_code) {
+#ifndef MIPS32
   append(new LIR_Op1(
             lir_move,
             src,
@@ -1147,6 +1234,18 @@
             type,
             patch_code,
             info, lir_move_volatile));
+#else
+  add(base, offset, base);
+  append(new LIR_Op1(
+            lir_move,
+            src,
+	    LIR_OprFact::address(new LIR_Address(base, 0, type)),
+	    type,
+            patch_code,
+            info, lir_move_volatile));
+
+#endif
+
 }


@@ -1193,7 +1292,7 @@
                     info));
 }

-
+#ifndef MIPS32
 void LIR_List::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
   append(new LIR_Op2(
                     lir_cmp,
@@ -1212,7 +1311,9 @@
                     LIR_OprFact::address(addr),
                     info));
 }
+#endif

+#ifndef MIPS32
 void LIR_List::allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4,
                                int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub) {
   append(new LIR_OpAllocObj(
@@ -1240,6 +1341,42 @@
                            type,
                            stub));
 }
+#else
+ void LIR_List::allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4, LIR_Opr t5, LIR_Opr t6,
+				int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub) {
+	append(new LIR_OpAllocObj(
+				klass,
+				dst,
+				t1,
+				t2,
+				t3,
+				t4,
+				t5,
+				t6,
+				header_size,
+				object_size,
+				init_check,
+				stub));
+}
+
+void LIR_List::allocate_array(LIR_Opr dst, LIR_Opr len, LIR_Opr t1,LIR_Opr t2, LIR_Opr t3,LIR_Opr t4, LIR_Opr t5,
+				BasicType type, LIR_Opr klass, CodeStub* stub) {
+	append(new LIR_OpAllocArray(
+				klass,
+				len,
+				dst,
+				t1,
+				t2,
+				t3,
+				t4,
+				t5,
+				type,
+				stub));
+}
+
+#endif
+
+

 void LIR_List::shift_left(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp) {
  append(new LIR_Op2(
@@ -1310,38 +1447,82 @@
                           LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check,
                           CodeEmitInfo* info_for_exception, CodeEmitInfo* info_for_patch, CodeStub* stub,
                           ciMethod* profiled_method, int profiled_bci) {
-  append(new LIR_OpTypeCheck(lir_checkcast, result, object, klass,
-                             tmp1, tmp2, tmp3, fast_check, info_for_exception, info_for_patch, stub,
-                             profiled_method, profiled_bci));
+  append(new LIR_OpTypeCheck(lir_checkcast,
+			result,
+			object,
+			klass,
+                	tmp1,
+			tmp2,
+			tmp3,
+			fast_check,
+			info_for_exception,
+			info_for_patch,
+			stub,
+                	profiled_method, profiled_bci));
 }


 void LIR_List::instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch) {
-  append(new LIR_OpTypeCheck(lir_instanceof, result, object, klass, tmp1, tmp2, tmp3, fast_check, NULL, info_for_patch, NULL, NULL, 0));
+  append(new LIR_OpTypeCheck(lir_instanceof,
+			result,
+			object,
+			klass,
+			tmp1,
+			tmp2,
+			tmp3,
+			fast_check,
+			NULL,
+			info_for_patch,
+			NULL,
+			NULL,
+			0));
 }


 void LIR_List::store_check(LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, CodeEmitInfo* info_for_exception) {
-  append(new LIR_OpTypeCheck(lir_store_check, object, array, tmp1, tmp2, tmp3, info_for_exception, NULL, 0));
+  append(new LIR_OpTypeCheck(lir_store_check,
+			object,
+			array,
+			tmp1,
+			tmp2,
+			tmp3,
+			info_for_exception,
+			NULL,
+			0));
 }


 void LIR_List::cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, LIR_Opr t1, LIR_Opr t2) {
   // Compare and swap produces condition code "zero" if contents_of(addr) == cmp_value,
   // implying successful swap of new_value into addr
-  append(new LIR_OpCompareAndSwap(lir_cas_long, addr, cmp_value, new_value, t1, t2));
+  append(new LIR_OpCompareAndSwap(lir_cas_long,
+			addr,
+			cmp_value,
+			new_value,
+			t1,
+			t2));
 }

 void LIR_List::cas_obj(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, LIR_Opr t1, LIR_Opr t2) {
   // Compare and swap produces condition code "zero" if contents_of(addr) == cmp_value,
   // implying successful swap of new_value into addr
-  append(new LIR_OpCompareAndSwap(lir_cas_obj, addr, cmp_value, new_value, t1, t2));
+  append(new LIR_OpCompareAndSwap(lir_cas_obj,
+			addr,
+			cmp_value,
+			new_value,
+			t1,
+			t2));
 }

 void LIR_List::cas_int(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, LIR_Opr t1, LIR_Opr t2) {
   // Compare and swap produces condition code "zero" if contents_of(addr) == cmp_value,
   // implying successful swap of new_value into addr
-  append(new LIR_OpCompareAndSwap(lir_cas_int, addr, cmp_value, new_value, t1, t2));
+  append(new LIR_OpCompareAndSwap(lir_cas_int,
+			addr,
+			cmp_value,
+			new_value,
+			t1,
+			t2));
 }


@@ -1421,6 +1602,8 @@
 // LIR_Address
 void LIR_Address::print_value_on(outputStream* out) const {
   out->print("Base:"); _base->print(out);
+
+#ifndef MIPS32
   if (!_index->is_illegal()) {
     out->print(" Index:"); _index->print(out);
     switch (scale()) {
@@ -1430,6 +1613,8 @@
     case times_8: out->print(" * 8"); break;
     }
   }
+#endif
+
   out->print(" Disp: %d", _disp);
 }

@@ -1702,20 +1887,26 @@

 // LIR_OpBranch
 void LIR_OpBranch::print_instr(outputStream* out) const {
-  print_condition(out, cond());             out->print(" ");
-  if (block() != NULL) {
-    out->print("[B%d] ", block()->block_id());
-  } else if (stub() != NULL) {
-    out->print("[");
-    stub()->print_name(out);
-    out->print(": 0x%x]", stub());
-    if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->bci());
-  } else {
-    out->print("[label:0x%x] ", label());
-  }
-  if (ublock() != NULL) {
-    out->print("unordered: [B%d] ", ublock()->block_id());
-  }
+	print_condition(out, cond());             out->print(" ");
+
+#ifdef MIPS32
+	in_opr1()->print(out);	out->print(" ");
+	in_opr2()->print(out);	out->print(" ");
+#endif
+	if (block() != NULL) {
+		out->print("[B%d] ", block()->block_id());
+	} else if (stub() != NULL) {
+		out->print("[");
+		stub()->print_name(out);
+		out->print(": 0x%x]", stub());
+		if (stub()->info() != NULL)
+			out->print(" [bci:%d]", stub()->info()->bci());
+	} else {
+    		out->print("[label:0x%x] ", label());
+  	}
+	if (ublock() != NULL) {
+		out->print("unordered: [B%d] ", ublock()->block_id());
+  	}
 }

 void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) {
@@ -1770,6 +1961,11 @@
   tmp2()->print(out);                       out->print(" ");
   tmp3()->print(out);                       out->print(" ");
   tmp4()->print(out);                       out->print(" ");
+
+#ifdef MIPS32
+  tmp5()->print(out);                       out->print(" ");
+  tmp6()->print(out);                       out->print(" ");
+#endif
   out->print("[hdr:%d]", header_size()); out->print(" ");
   out->print("[obj:%d]", object_size()); out->print(" ");
   out->print("[lbl:0x%x]", stub()->entry());
@@ -1783,9 +1979,12 @@

 // LIR_Op2
 void LIR_Op2::print_instr(outputStream* out) const {
+
+#ifndef MIPS32
   if (code() == lir_cmove) {
     print_condition(out, condition());         out->print(" ");
   }
+#endif
   in_opr1()->print(out);    out->print(" ");
   in_opr2()->print(out);    out->print(" ");
   if (tmp_opr()->is_valid()) { tmp_opr()->print(out);    out->print(" "); }
@@ -1800,6 +1999,11 @@
   tmp2()->print(out);                    out->print(" ");
   tmp3()->print(out);                    out->print(" ");
   tmp4()->print(out);                    out->print(" ");
+
+#ifdef MIPS32
+  tmp5()->print(out);                    out->print(" ");
+#endif
+
   out->print("[type:0x%x]", type());     out->print(" ");
   out->print("[label:0x%x]", stub()->entry());
 }
--- a/hotspot/src/share/vm/c1/c1_LIR.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_LIR.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -193,9 +193,8 @@

  private:
   friend class LIR_OprFact;
-
+  intptr_t value() const                         { return (intptr_t) this; }
   // Conversion
-  intptr_t value() const                         { return (intptr_t) this; }

   bool check_value_mask(intptr_t mask, intptr_t masked_value) const {
     return (value() & mask) == masked_value;
@@ -437,6 +436,14 @@
   FloatRegister as_double_reg  () const;
 #endif

+#ifdef MIPS32
+  FloatRegister as_float_reg   () const;
+  FloatRegister as_double_reg  () const;
+
+  FloatRegister as_fpu_lo   () const;
+  FloatRegister as_fpu_hi   () const;
+#endif
+
   jint      as_jint()    const { return as_constant_ptr()->as_jint(); }
   jlong     as_jlong()   const { return as_constant_ptr()->as_jlong(); }
   jfloat    as_jfloat()  const { return as_constant_ptr()->as_jfloat(); }
@@ -497,6 +504,14 @@
   BasicType _type;

  public:
+#ifdef MIPS32
+  LIR_Address(LIR_Opr base, int disp,BasicType type):
+       _base(base)
+     , _disp(disp)
+     , _type(type)
+     , _index(LIR_OprDesc::illegalOpr())
+     , _scale(times_1){ assert(base->is_single_cpu(), "wrong base operand");}
+#else
   LIR_Address(LIR_Opr base, LIR_Opr index, BasicType type):
        _base(base)
      , _index(index)
@@ -519,13 +534,19 @@
      , _type(type)
      , _disp(disp) { verify(); }
 #endif // X86
+#endif

   LIR_Opr base()  const                          { return _base;  }
   LIR_Opr index() const                          { return _index; }
   Scale   scale() const                          { return _scale; }
   intx    disp()  const                          { return _disp;  }

-  bool equals(LIR_Address* other) const          { return base() == other->base() && index() == other->index() && disp() == other->disp() && scale() == other->scale(); }
+  bool equals(LIR_Address* other) const          {
+	return base() == other->base() &&
+		index() == other->index() &&
+		disp() == other->disp() &&
+		scale() == other->scale();
+  }

   virtual LIR_Address* as_address()              { return this;   }
   virtual BasicType type() const                 { return _type; }
@@ -566,6 +587,15 @@
                                                                              LIR_OprDesc::fpu_register         |
                                                                              LIR_OprDesc::double_size); }
 #endif
+
+#ifdef MIPS32
+  static LIR_Opr double_fpu(int reg)            { return (LIR_Opr)(intptr_t)((reg  << LIR_OprDesc::reg1_shift) |
+                                                                             (reg  << LIR_OprDesc::reg2_shift) |
+                                                                             LIR_OprDesc::double_type          |
+                                                                             LIR_OprDesc::fpu_register         |
+                                                                             LIR_OprDesc::double_size); }
+#endif
+
 #ifdef X86
   static LIR_Opr double_fpu(int reg)            { return (LIR_Opr)(intptr_t)((reg  << LIR_OprDesc::reg1_shift) |
                                                                              (reg  << LIR_OprDesc::reg2_shift) |
@@ -789,8 +819,10 @@
       , lir_return
       , lir_leal
       , lir_neg
+#ifndef MIPS32
       , lir_branch
       , lir_cond_float_branch
+#endif
       , lir_move
       , lir_prefetchr
       , lir_prefetchw
@@ -801,6 +833,10 @@
       , lir_safepoint
   , end_op1
   , begin_op2
+#ifdef MIPS32
+      , lir_branch
+      , lir_cond_float_branch
+#endif
       , lir_cmp
       , lir_cmp_l2i
       , lir_ucmp_fd2i
@@ -1229,7 +1265,7 @@
   virtual void verify() const;
 };

-
+#ifndef MIPS32
 class LIR_OpBranch: public LIR_Op {
  friend class LIR_OpVisitState;

@@ -1271,7 +1307,7 @@
   virtual LIR_OpBranch* as_OpBranch() { return this; }
   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
-
+#endif

 class ConversionStub;

@@ -1300,6 +1336,7 @@


 // LIR_OpAllocObj
+#ifndef MIPS32
 class LIR_OpAllocObj : public LIR_Op1 {
  friend class LIR_OpVisitState;

@@ -1342,7 +1379,56 @@
   virtual LIR_OpAllocObj * as_OpAllocObj () { return this; }
   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
+#else
+class LIR_OpAllocObj : public LIR_Op1 {
+ friend class LIR_OpVisitState;

+ private:
+  LIR_Opr _tmp1;
+  LIR_Opr _tmp2;
+  LIR_Opr _tmp3;
+  LIR_Opr _tmp4;
+  LIR_Opr _tmp5;
+  LIR_Opr _tmp6;
+  int     _hdr_size;
+  int     _obj_size;
+  CodeStub* _stub;
+  bool    _init_check;
+
+ public:
+  LIR_OpAllocObj(LIR_Opr klass, LIR_Opr result,
+                 LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4,LIR_Opr t5, LIR_Opr t6,
+                 int hdr_size, int obj_size, bool init_check, CodeStub* stub)
+    : LIR_Op1(lir_alloc_object, klass, result)
+    , _tmp1(t1)
+    , _tmp2(t2)
+    , _tmp3(t3)
+    , _tmp4(t4)
+    , _tmp5(t5)
+    , _tmp6(t6)
+    , _hdr_size(hdr_size)
+    , _obj_size(obj_size)
+    , _init_check(init_check)
+    , _stub(stub)                                { }
+
+  LIR_Opr klass()        const                   { return in_opr();     }
+  LIR_Opr obj()          const                   { return result_opr(); }
+  LIR_Opr tmp1()         const                   { return _tmp1;        }
+  LIR_Opr tmp2()         const                   { return _tmp2;        }
+  LIR_Opr tmp3()         const                   { return _tmp3;        }
+  LIR_Opr tmp4()         const                   { return _tmp4;        }
+  LIR_Opr tmp5()         const                   { return _tmp5;        }
+  LIR_Opr tmp6()         const                   { return _tmp6;        }
+  int     header_size()  const                   { return _hdr_size;    }
+  int     object_size()  const                   { return _obj_size;    }
+  bool    init_check()   const                   { return _init_check;  }
+  CodeStub* stub()       const                   { return _stub;        }
+
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_OpAllocObj * as_OpAllocObj () { return this; }
+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+#endif

 // LIR_OpRoundFP
 class LIR_OpRoundFP : public LIR_Op1 {
@@ -1410,6 +1496,7 @@
 };

 // LIR_Op2
+#ifndef MIPS32
 class LIR_Op2: public LIR_Op {
  friend class LIR_OpVisitState;

@@ -1488,7 +1575,104 @@
   virtual LIR_Op2* as_Op2() { return this; }
   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
+#else
+class LIR_Op2: public LIR_Op {
+ //friend class LIR_Optimizer;
+  friend class LIR_OpVisitState;
+ protected:
+  LIR_Opr   _opr1;
+  LIR_Opr   _opr2;
+  BasicType _type;
+  LIR_Opr   _tmp;

+  virtual void verify() const;
+ public:
+  LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result = LIR_OprFact::illegalOpr,
+          CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL)
+    : LIR_Op(code, result, info),
+			_opr1(opr1), _opr2(opr2),
+			_type(type),
+			_tmp(LIR_OprFact::illegalOpr)              {
+    assert(code!=lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
+  }
+
+
+  LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp)
+    : LIR_Op(code, result, NULL),
+			_opr1(opr1), _opr2(opr2),
+			_type(T_ILLEGAL),
+			_tmp(tmp) {
+    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
+  }
+
+  LIR_Opr in_opr1() const                        { return _opr1; }
+  LIR_Opr in_opr2() const                        { return _opr2; }
+  BasicType type()  const		         { return _type; }
+  LIR_Opr tmp_opr() const                        { return _tmp; }
+
+
+  void set_in_opr1(LIR_Opr opr)                  { _opr1 = opr; }
+  void set_in_opr2(LIR_Opr opr)                  { _opr2 = opr; }
+  // where is the defination of LIR_AbstractAssembler?, 12/21,2006, jerome
+  //virtual void emit_code(LIR_AbstractAssembler* masm);
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_Op2* as_Op2() { return this; }
+
+  // virtual void print_instr() const PRODUCT_RETURN;
+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+
+
+class LIR_OpBranch: public LIR_Op2 {
+friend class LIR_OpVisitState;
+public:
+
+ private:
+  LIR_Condition _cond;
+  BasicType     _type;
+  Label*        _label;
+  BlockBegin*   _block;  // if this is a branch to a block, this is the block
+  BlockBegin*   _ublock;  // if this is a float branch , this is the unorder block
+  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
+
+ public:
+  // these are temporary constructors until we start using the conditional register
+  LIR_OpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl)
+    : LIR_Op2(lir_branch, left, right, LIR_OprFact::illegalOpr, (CodeEmitInfo *)NULL), _cond(cond), _label(lbl), _block(NULL), _ublock(NULL),_stub(NULL){}
+
+
+  LIR_OpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block);
+
+  LIR_OpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type, CodeStub* stub);
+
+  LIR_OpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin *block,BlockBegin *ublock);
+
+  LIR_Condition cond()        const              { return _cond;        }
+  BasicType     type()        const              { return _type;        }
+  LIR_Opr       left()        const              { return in_opr1();    }
+  LIR_Opr       right()       const              { return in_opr2();    }
+  Label*        label()       const              { return _label;       }
+  BlockBegin*   block()       const              { return _block;       }
+  BlockBegin*   ublock()      const              { return _ublock;      }
+  CodeStub*     stub()        const              { return _stub;        }
+
+
+  void          change_block(BlockBegin* b);
+  void          change_ublock(BlockBegin* b);
+  void          negate_cond();
+
+
+  // 12/21,06,jerome
+  //virtual void emit_code(LIR_AbstractAssembler* masm);
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_OpBranch* as_OpBranch() { return this; }
+  //virtual void print_instr() const PRODUCT_RETURN;
+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+
+};
+#endif
+
+#ifndef MIPS32
 class LIR_OpAllocArray : public LIR_Op {
  friend class LIR_OpVisitState;

@@ -1528,7 +1712,50 @@
   virtual LIR_OpAllocArray * as_OpAllocArray () { return this; }
   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
+#else
+class LIR_OpAllocArray : public LIR_Op {
+ friend class LIR_OpVisitState;

+ private:
+  LIR_Opr   _klass;
+  LIR_Opr   _len;
+  LIR_Opr   _tmp1;
+  LIR_Opr   _tmp2;
+  LIR_Opr   _tmp3;
+  LIR_Opr   _tmp4;
+  LIR_Opr   _tmp5;
+  BasicType _type;
+  CodeStub* _stub;
+
+ public:
+  LIR_OpAllocArray(LIR_Opr klass, LIR_Opr len, LIR_Opr result, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4,  LIR_Opr t5, BasicType type, CodeStub* stub)
+    : LIR_Op(lir_alloc_array, result, NULL)
+    , _klass(klass)
+    , _len(len)
+    , _tmp1(t1)
+    , _tmp2(t2)
+    , _tmp3(t3)
+    , _tmp4(t4)
+    , _tmp5(t5)
+    , _type(type)
+    , _stub(stub) {}
+
+  LIR_Opr   klass()   const                      { return _klass;       }
+  LIR_Opr   len()     const                      { return _len;         }
+  LIR_Opr   obj()     const                      { return result_opr(); }
+  LIR_Opr   tmp1()    const                      { return _tmp1;        }
+  LIR_Opr   tmp2()    const                      { return _tmp2;        }
+  LIR_Opr   tmp3()    const                      { return _tmp3;        }
+  LIR_Opr   tmp4()    const                      { return _tmp4;        }
+  LIR_Opr   tmp5()    const                      { return _tmp5;        }
+  BasicType type()    const                      { return _type;        }
+  CodeStub* stub()    const                      { return _stub;        }
+
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_OpAllocArray * as_OpAllocArray () { return this; }
+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+#endif

 class LIR_Op3: public LIR_Op {
  friend class LIR_OpVisitState;
@@ -1820,6 +2047,7 @@
   void push(LIR_Opr opr)                                   { append(new LIR_Op1(lir_push, opr)); }
   void pop(LIR_Opr reg)                                    { append(new LIR_Op1(lir_pop,  reg)); }

+#ifndef MIPS32
   void cmp(LIR_Condition condition, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info = NULL) {
     append(new LIR_Op2(lir_cmp, condition, left, right, info));
   }
@@ -1833,6 +2061,7 @@
   void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst) {
     append(new LIR_Op2(lir_cmove, condition, src1, src2, dst));
   }
+#endif

   void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, LIR_Opr t1, LIR_Opr t2);
   void cas_obj(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, LIR_Opr t1, LIR_Opr t2);
@@ -1872,16 +2101,29 @@
   void irem(LIR_Opr left, LIR_Opr right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info);
   void irem(LIR_Opr left, int   right, LIR_Opr res, LIR_Opr tmp, CodeEmitInfo* info);

+#ifndef MIPS32
   void allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4, int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub);
   void allocate_array(LIR_Opr dst, LIR_Opr len, LIR_Opr t1,LIR_Opr t2, LIR_Opr t3,LIR_Opr t4, BasicType type, LIR_Opr klass, CodeStub* stub);
-
+#else
+  void allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4, LIR_Opr t5, LIR_Opr t6,int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub);
+  void allocate_array(LIR_Opr dst, LIR_Opr len, LIR_Opr t1,LIR_Opr t2, LIR_Opr t3,LIR_Opr t4, LIR_Opr t5,BasicType type, LIR_Opr klass, CodeStub* stub);
+#endif
   // jump is an unconditional branch
   void jump(BlockBegin* block) {
+#ifndef MIPS32
     append(new LIR_OpBranch(lir_cond_always, T_ILLEGAL, block));
+#else
+    append(new LIR_OpBranch(lir_cond_always, LIR_OprFact::illegalOpr,LIR_OprFact::illegalOpr,T_ILLEGAL, block));
+#endif
   }
   void jump(CodeStub* stub) {
+#ifndef MIPS32
     append(new LIR_OpBranch(lir_cond_always, T_ILLEGAL, stub));
+#else
+    append(new LIR_OpBranch(lir_cond_always, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr,T_ILLEGAL, stub));
+#endif
   }
+#ifndef MIPS32
   void branch(LIR_Condition cond, Label* lbl)        { append(new LIR_OpBranch(cond, lbl)); }
   void branch(LIR_Condition cond, BasicType type, BlockBegin* block) {
     assert(type != T_FLOAT && type != T_DOUBLE, "no fp comparisons");
@@ -1895,7 +2137,25 @@
     assert(type == T_FLOAT || type == T_DOUBLE, "fp comparisons only");
     append(new LIR_OpBranch(cond, type, block, unordered));
   }
+#else
+  void branch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl) {
+	  	append(new LIR_OpBranch(cond, left, right, lbl));
+  }

+  void branch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block) {
+ 		append(new LIR_OpBranch(cond, left, right, type,block));
+  }
+
+  void branch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type, CodeStub* stub) {
+	  	append(new LIR_OpBranch(cond, left, right, type,stub));
+  }
+
+  void branch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BasicType type,
+		BlockBegin* block, BlockBegin* unordered) {
+	  	append(new LIR_OpBranch(cond, left, right, type,block,unordered));
+  }
+
+#endif
   void shift_left(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
   void shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
   void unsigned_shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -27,43 +27,46 @@


 void LIR_Assembler::patching_epilog(PatchingStub* patch, LIR_PatchCode patch_code, Register obj, CodeEmitInfo* info) {
-  // we must have enough patching space so that call can be inserted
-  while ((intx) _masm->pc() - (intx) patch->pc_start() < NativeCall::instruction_size) {
-    _masm->nop();
-  }
-  patch->install(_masm, patch_code, obj, info);
-  append_patching_stub(patch);
+	// we must have enough patching space so that call can be inserted
+#ifndef MIPS32
+	while ((intx) _masm->pc() - (intx) patch->pc_start() < NativeCall::instruction_size) {
+		_masm->nop();
+	}
+#endif
+
+	patch->install(_masm, patch_code, obj, info);
+	append_patching_stub(patch);

 #ifdef ASSERT
-  Bytecodes::Code code = info->scope()->method()->java_code_at_bci(info->bci());
-  if (patch->id() == PatchingStub::access_field_id) {
-    switch (code) {
-      case Bytecodes::_putstatic:
-      case Bytecodes::_getstatic:
-      case Bytecodes::_putfield:
-      case Bytecodes::_getfield:
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-  } else if (patch->id() == PatchingStub::load_klass_id) {
-    switch (code) {
-      case Bytecodes::_putstatic:
-      case Bytecodes::_getstatic:
-      case Bytecodes::_new:
-      case Bytecodes::_anewarray:
-      case Bytecodes::_multianewarray:
-      case Bytecodes::_instanceof:
-      case Bytecodes::_checkcast:
-      case Bytecodes::_ldc:
-      case Bytecodes::_ldc_w:
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-  } else {
-    ShouldNotReachHere();
-  }
+	Bytecodes::Code code = info->scope()->method()->java_code_at_bci(info->bci());
+	if (patch->id() == PatchingStub::access_field_id) {
+		switch (code) {
+			case Bytecodes::_putstatic:
+			case Bytecodes::_getstatic:
+			case Bytecodes::_putfield:
+			case Bytecodes::_getfield:
+				break;
+			default:
+				ShouldNotReachHere();
+		}
+	} else if (patch->id() == PatchingStub::load_klass_id) {
+		switch (code) {
+			case Bytecodes::_putstatic:
+			case Bytecodes::_getstatic:
+			case Bytecodes::_new:
+			case Bytecodes::_anewarray:
+			case Bytecodes::_multianewarray:
+			case Bytecodes::_instanceof:
+			case Bytecodes::_checkcast:
+			case Bytecodes::_ldc:
+			case Bytecodes::_ldc_w:
+				break;
+			default:
+				ShouldNotReachHere();
+		}
+	} else {
+		ShouldNotReachHere();
+	}
 #endif
 }

@@ -72,15 +75,15 @@


 LIR_Assembler::LIR_Assembler(Compilation* c):
-   _compilation(c)
- , _masm(c->masm())
- , _bs(Universe::heap()->barrier_set())
- , _frame_map(c->frame_map())
- , _current_block(NULL)
- , _pending_non_safepoint(NULL)
- , _pending_non_safepoint_offset(0)
+	_compilation(c)
+	, _masm(c->masm())
+	, _bs(Universe::heap()->barrier_set())
+	, _frame_map(c->frame_map())
+	, _current_block(NULL)
+	, _pending_non_safepoint(NULL)
+	, _pending_non_safepoint_offset(0)
 {
-  _slow_case_stubs = new CodeStubList();
+	_slow_case_stubs = new CodeStubList();
 }


@@ -89,201 +92,204 @@


 void LIR_Assembler::append_patching_stub(PatchingStub* stub) {
-  _slow_case_stubs->append(stub);
+	_slow_case_stubs->append(stub);
 }


 void LIR_Assembler::check_codespace() {
-  CodeSection* cs = _masm->code_section();
-  if (cs->remaining() < (int)(1*K)) {
-    BAILOUT("CodeBuffer overflow");
-  }
+	CodeSection* cs = _masm->code_section();
+	if (cs->remaining() < (int)(1*K)) {
+		BAILOUT("CodeBuffer overflow");
+	}
 }


 void LIR_Assembler::emit_code_stub(CodeStub* stub) {
-  _slow_case_stubs->append(stub);
+	_slow_case_stubs->append(stub);
 }

 void LIR_Assembler::emit_stubs(CodeStubList* stub_list) {
-  for (int m = 0; m < stub_list->length(); m++) {
-    CodeStub* s = (*stub_list)[m];

-    check_codespace();
-    CHECK_BAILOUT();
+	for (int m = 0; m < stub_list->length(); m++) {
+		CodeStub* s = (*stub_list)[m];
+
+		check_codespace();
+		CHECK_BAILOUT();

 #ifndef PRODUCT
-    if (CommentedAssembly) {
-      stringStream st;
-      s->print_name(&st);
-      st.print(" slow case");
-      _masm->block_comment(st.as_string());
-    }
+		if (CommentedAssembly) {
+			stringStream st;
+			s->print_name(&st);
+			st.print(" slow case");
+			_masm->block_comment(st.as_string());
+		}
 #endif
-    s->emit_code(this);
+		s->emit_code(this);
 #ifdef ASSERT
-    s->assert_no_unbound_labels();
+		s->assert_no_unbound_labels();
 #endif
-  }
+	}
 }


 void LIR_Assembler::emit_slow_case_stubs() {
-  emit_stubs(_slow_case_stubs);
+	emit_stubs(_slow_case_stubs);
 }


 bool LIR_Assembler::needs_icache(ciMethod* method) const {
-  return !method->is_static();
+	return !method->is_static();
 }


 int LIR_Assembler::code_offset() const {
-  return _masm->offset();
+	return _masm->offset();
 }


 address LIR_Assembler::pc() const {
-  return _masm->pc();
+	return _masm->pc();
 }


 void LIR_Assembler::emit_exception_entries(ExceptionInfoList* info_list) {
-  for (int i = 0; i < info_list->length(); i++) {
-    XHandlers* handlers = info_list->at(i)->exception_handlers();
+	for (int i = 0; i < info_list->length(); i++) {
+		XHandlers* handlers = info_list->at(i)->exception_handlers();

-    for (int j = 0; j < handlers->length(); j++) {
-      XHandler* handler = handlers->handler_at(j);
-      assert(handler->lir_op_id() != -1, "handler not processed by LinearScan");
-      assert(handler->entry_code() == NULL ||
-             handler->entry_code()->instructions_list()->last()->code() == lir_branch ||
-             handler->entry_code()->instructions_list()->last()->code() == lir_delay_slot, "last operation must be branch");
+		for (int j = 0; j < handlers->length(); j++) {
+			XHandler* handler = handlers->handler_at(j);
+			assert(handler->lir_op_id() != -1, "handler not processed by LinearScan");
+			assert(handler->entry_code() == NULL ||
+					handler->entry_code()->instructions_list()->last()->code() == lir_branch ||
+					handler->entry_code()->instructions_list()->last()->code() == lir_delay_slot, "last operation must be branch");

-      if (handler->entry_pco() == -1) {
-        // entry code not emitted yet
-        if (handler->entry_code() != NULL && handler->entry_code()->instructions_list()->length() > 1) {
-          handler->set_entry_pco(code_offset());
-          if (CommentedAssembly) {
-            _masm->block_comment("Exception adapter block");
-          }
-          emit_lir_list(handler->entry_code());
-        } else {
-          handler->set_entry_pco(handler->entry_block()->exception_handler_pco());
-        }
+			if (handler->entry_pco() == -1) {
+				// entry code not emitted yet
+				if (handler->entry_code() != NULL && handler->entry_code()->instructions_list()->length() > 1) {
+					handler->set_entry_pco(code_offset());
+					if (CommentedAssembly) {
+						_masm->block_comment("Exception adapter block");
+					}
+					emit_lir_list(handler->entry_code());
+				} else {
+					handler->set_entry_pco(handler->entry_block()->exception_handler_pco());
+				}

-        assert(handler->entry_pco() != -1, "must be set now");
-      }
-    }
-  }
+				assert(handler->entry_pco() != -1, "must be set now");
+			}
+		}
+	}
 }


 void LIR_Assembler::emit_code(BlockList* hir) {
-  if (PrintLIR) {
-    print_LIR(hir);
-  }
+	if (PrintLIR) {
+		print_LIR(hir);
+	}

-  int n = hir->length();
-  for (int i = 0; i < n; i++) {
-    emit_block(hir->at(i));
-    CHECK_BAILOUT();
-  }
+	int n = hir->length();
+	for (int i = 0; i < n; i++) {
+		emit_block(hir->at(i));
+		CHECK_BAILOUT();
+	}

-  flush_debug_info(code_offset());
+	flush_debug_info(code_offset());

-  DEBUG_ONLY(check_no_unbound_labels());
+	DEBUG_ONLY(check_no_unbound_labels());
+
 }


 void LIR_Assembler::emit_block(BlockBegin* block) {
-  if (block->is_set(BlockBegin::backward_branch_target_flag)) {
-    align_backward_branch_target();
-  }
+	if (block->is_set(BlockBegin::backward_branch_target_flag)) {
+		align_backward_branch_target();
+	}

-  // if this block is the start of an exception handler, record the
-  // PC offset of the first instruction for later construction of
-  // the ExceptionHandlerTable
-  if (block->is_set(BlockBegin::exception_entry_flag)) {
-    block->set_exception_handler_pco(code_offset());
-  }
+	// if this block is the start of an exception handler, record the
+	// PC offset of the first instruction for later construction of
+	// the ExceptionHandlerTable
+	if (block->is_set(BlockBegin::exception_entry_flag)) {
+		block->set_exception_handler_pco(code_offset());
+	}

 #ifndef PRODUCT
-  if (PrintLIRWithAssembly) {
-    // don't print Phi's
-    InstructionPrinter ip(false);
-    block->print(ip);
-  }
+	if (PrintLIRWithAssembly) {
+		// don't print Phi's
+		InstructionPrinter ip(false);
+		block->print(ip);
+	}
 #endif /* PRODUCT */

-  assert(block->lir() != NULL, "must have LIR");
-  X86_ONLY(assert(_masm->rsp_offset() == 0, "frame size should be fixed"));
+	assert(block->lir() != NULL, "must have LIR");
+	X86_ONLY(assert(_masm->rsp_offset() == 0, "frame size should be fixed"));

 #ifndef PRODUCT
-  if (CommentedAssembly) {
-    stringStream st;
-    st.print_cr(" block B%d [%d, %d]", block->block_id(), block->bci(), block->end()->bci());
-    _masm->block_comment(st.as_string());
-  }
+	if (CommentedAssembly) {
+		stringStream st;
+		st.print_cr(" block B%d [%d, %d]", block->block_id(), block->bci(), block->end()->bci());
+		_masm->block_comment(st.as_string());
+	}
 #endif

-  emit_lir_list(block->lir());
+	emit_lir_list(block->lir());

-  X86_ONLY(assert(_masm->rsp_offset() == 0, "frame size should be fixed"));
+	X86_ONLY(assert(_masm->rsp_offset() == 0, "frame size should be fixed"));
 }


 void LIR_Assembler::emit_lir_list(LIR_List* list) {
-  peephole(list);

-  int n = list->length();
-  for (int i = 0; i < n; i++) {
-    LIR_Op* op = list->at(i);
+	peephole(list);
+	int n = list->length();
+	for (int i = 0; i < n; i++) {
+		LIR_Op* op = list->at(i);

-    check_codespace();
-    CHECK_BAILOUT();
+		check_codespace();
+		CHECK_BAILOUT();

 #ifndef PRODUCT
-    if (CommentedAssembly) {
-      // Don't record out every op since that's too verbose.  Print
-      // branches since they include block and stub names.  Also print
-      // patching moves since they generate funny looking code.
-      if (op->code() == lir_branch ||
-          (op->code() == lir_move && op->as_Op1()->patch_code() != lir_patch_none)) {
-        stringStream st;
-        op->print_on(&st);
-        _masm->block_comment(st.as_string());
-      }
-    }
-    if (PrintLIRWithAssembly) {
-      // print out the LIR operation followed by the resulting assembly
-      list->at(i)->print(); tty->cr();
-    }
+		if (CommentedAssembly) {
+			// Don't record out every op since that's too verbose.  Print
+			// branches since they include block and stub names.  Also print
+			// patching moves since they generate funny looking code.
+			if (op->code() == lir_branch ||
+					(op->code() == lir_move && op->as_Op1()->patch_code() != lir_patch_none)) {
+				stringStream st;
+				op->print_on(&st);
+				_masm->block_comment(st.as_string());
+			}
+		}
+		if (PrintLIRWithAssembly) {
+			// print out the LIR operation followed by the resulting assembly
+			list->at(i)->print(); tty->cr();
+		}
 #endif /* PRODUCT */

-    op->emit_code(this);
+
+		op->emit_code(this);

-    if (compilation()->debug_info_recorder()->recording_non_safepoints()) {
-      process_debug_info(op);
-    }
+		if (compilation()->debug_info_recorder()->recording_non_safepoints()) {
+			process_debug_info(op);
+		}

 #ifndef PRODUCT
-    if (PrintLIRWithAssembly) {
-      _masm->code()->decode();
-    }
+		if (PrintLIRWithAssembly) {
+			_masm->code()->decode();
+		}
 #endif /* PRODUCT */
-  }
+	}
 }

 #ifdef ASSERT
 void LIR_Assembler::check_no_unbound_labels() {
-  CHECK_BAILOUT();
+	CHECK_BAILOUT();

-  for (int i = 0; i < _branch_target_blocks.length() - 1; i++) {
-    if (!_branch_target_blocks.at(i)->label()->is_bound()) {
-      tty->print_cr("label of block B%d is not bound", _branch_target_blocks.at(i)->block_id());
-      assert(false, "unbound label");
-    }
-  }
+	for (int i = 0; i < _branch_target_blocks.length() - 1; i++) {
+		if (!_branch_target_blocks.at(i)->label()->is_bound()) {
+			tty->print_cr("label of block B%d is not bound", _branch_target_blocks.at(i)->block_id());
+			assert(false, "unbound label");
+		}
+	}
 }
 #endif

@@ -291,505 +297,512 @@


 void LIR_Assembler::add_debug_info_for_branch(CodeEmitInfo* info) {
-  _masm->code_section()->relocate(pc(), relocInfo::poll_type);
-  int pc_offset = code_offset();
-  flush_debug_info(pc_offset);
-  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
-  if (info->exception_handlers() != NULL) {
-    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
-  }
+	_masm->code_section()->relocate(pc(), relocInfo::poll_type);
+	int pc_offset = code_offset();
+	flush_debug_info(pc_offset);
+	info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
+	if (info->exception_handlers() != NULL) {
+		compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
+	}
 }


 void LIR_Assembler::add_call_info(int pc_offset, CodeEmitInfo* cinfo) {
-  flush_debug_info(pc_offset);
-  cinfo->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
-  if (cinfo->exception_handlers() != NULL) {
-    compilation()->add_exception_handlers_for_pco(pc_offset, cinfo->exception_handlers());
-  }
+	flush_debug_info(pc_offset);
+	cinfo->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
+	if (cinfo->exception_handlers() != NULL) {
+		compilation()->add_exception_handlers_for_pco(pc_offset, cinfo->exception_handlers());
+	}
 }

 static ValueStack* debug_info(Instruction* ins) {
-  StateSplit* ss = ins->as_StateSplit();
-  if (ss != NULL) return ss->state();
-  return ins->lock_stack();
+	StateSplit* ss = ins->as_StateSplit();
+	if (ss != NULL) return ss->state();
+	return ins->lock_stack();
 }

 void LIR_Assembler::process_debug_info(LIR_Op* op) {
-  Instruction* src = op->source();
-  if (src == NULL)  return;
-  int pc_offset = code_offset();
-  if (_pending_non_safepoint == src) {
-    _pending_non_safepoint_offset = pc_offset;
-    return;
-  }
-  ValueStack* vstack = debug_info(src);
-  if (vstack == NULL)  return;
-  if (_pending_non_safepoint != NULL) {
-    // Got some old debug info.  Get rid of it.
-    if (_pending_non_safepoint->bci() == src->bci() &&
-        debug_info(_pending_non_safepoint) == vstack) {
-      _pending_non_safepoint_offset = pc_offset;
-      return;
-    }
-    if (_pending_non_safepoint_offset < pc_offset) {
-      record_non_safepoint_debug_info();
-    }
-    _pending_non_safepoint = NULL;
-  }
-  // Remember the debug info.
-  if (pc_offset > compilation()->debug_info_recorder()->last_pc_offset()) {
-    _pending_non_safepoint = src;
-    _pending_non_safepoint_offset = pc_offset;
-  }
+	Instruction* src = op->source();
+	if (src == NULL)  return;
+	int pc_offset = code_offset();
+	if (_pending_non_safepoint == src) {
+		_pending_non_safepoint_offset = pc_offset;
+		return;
+	}
+	ValueStack* vstack = debug_info(src);
+	if (vstack == NULL)  return;
+	if (_pending_non_safepoint != NULL) {
+		// Got some old debug info.  Get rid of it.
+		if (_pending_non_safepoint->bci() == src->bci() &&
+				debug_info(_pending_non_safepoint) == vstack) {
+			_pending_non_safepoint_offset = pc_offset;
+			return;
+		}
+		if (_pending_non_safepoint_offset < pc_offset) {
+			record_non_safepoint_debug_info();
+		}
+		_pending_non_safepoint = NULL;
+	}
+	// Remember the debug info.
+	if (pc_offset > compilation()->debug_info_recorder()->last_pc_offset()) {
+		_pending_non_safepoint = src;
+		_pending_non_safepoint_offset = pc_offset;
+	}
 }

 // Index caller states in s, where 0 is the oldest, 1 its callee, etc.
 // Return NULL if n is too large.
 // Returns the caller_bci for the next-younger state, also.
 static ValueStack* nth_oldest(ValueStack* s, int n, int& bci_result) {
-  ValueStack* t = s;
-  for (int i = 0; i < n; i++) {
-    if (t == NULL)  break;
-    t = t->caller_state();
-  }
-  if (t == NULL)  return NULL;
-  for (;;) {
-    ValueStack* tc = t->caller_state();
-    if (tc == NULL)  return s;
-    t = tc;
-    bci_result = s->scope()->caller_bci();
-    s = s->caller_state();
-  }
+	ValueStack* t = s;
+	for (int i = 0; i < n; i++) {
+		if (t == NULL)  break;
+		t = t->caller_state();
+	}
+	if (t == NULL)  return NULL;
+	for (;;) {
+		ValueStack* tc = t->caller_state();
+		if (tc == NULL)  return s;
+		t = tc;
+		bci_result = s->scope()->caller_bci();
+		s = s->caller_state();
+	}
 }

 void LIR_Assembler::record_non_safepoint_debug_info() {
-  int         pc_offset = _pending_non_safepoint_offset;
-  ValueStack* vstack    = debug_info(_pending_non_safepoint);
-  int         bci       = _pending_non_safepoint->bci();
+	int         pc_offset = _pending_non_safepoint_offset;
+	ValueStack* vstack    = debug_info(_pending_non_safepoint);
+	int         bci       = _pending_non_safepoint->bci();

-  DebugInformationRecorder* debug_info = compilation()->debug_info_recorder();
-  assert(debug_info->recording_non_safepoints(), "sanity");
+	DebugInformationRecorder* debug_info = compilation()->debug_info_recorder();
+	assert(debug_info->recording_non_safepoints(), "sanity");

-  debug_info->add_non_safepoint(pc_offset);
+	debug_info->add_non_safepoint(pc_offset);

-  // Visit scopes from oldest to youngest.
-  for (int n = 0; ; n++) {
-    int s_bci = bci;
-    ValueStack* s = nth_oldest(vstack, n, s_bci);
-    if (s == NULL)  break;
-    IRScope* scope = s->scope();
-    debug_info->describe_scope(pc_offset, scope->method(), s_bci);
-  }
+	// Visit scopes from oldest to youngest.
+	for (int n = 0; ; n++) {
+		int s_bci = bci;
+		ValueStack* s = nth_oldest(vstack, n, s_bci);
+		if (s == NULL)  break;
+		IRScope* scope = s->scope();
+		debug_info->describe_scope(pc_offset, scope->method(), s_bci);
+	}

-  debug_info->end_non_safepoint(pc_offset);
+	debug_info->end_non_safepoint(pc_offset);
 }


 void LIR_Assembler::add_debug_info_for_null_check_here(CodeEmitInfo* cinfo) {
-  add_debug_info_for_null_check(code_offset(), cinfo);
+	add_debug_info_for_null_check(code_offset(), cinfo);
 }

 void LIR_Assembler::add_debug_info_for_null_check(int pc_offset, CodeEmitInfo* cinfo) {
-  ImplicitNullCheckStub* stub = new ImplicitNullCheckStub(pc_offset, cinfo);
-  emit_code_stub(stub);
+	ImplicitNullCheckStub* stub = new ImplicitNullCheckStub(pc_offset, cinfo);
+	emit_code_stub(stub);
 }

 void LIR_Assembler::add_debug_info_for_div0_here(CodeEmitInfo* info) {
-  add_debug_info_for_div0(code_offset(), info);
+	add_debug_info_for_div0(code_offset(), info);
 }

 void LIR_Assembler::add_debug_info_for_div0(int pc_offset, CodeEmitInfo* cinfo) {
-  DivByZeroStub* stub = new DivByZeroStub(pc_offset, cinfo);
-  emit_code_stub(stub);
+	DivByZeroStub* stub = new DivByZeroStub(pc_offset, cinfo);
+	emit_code_stub(stub);
 }

 void LIR_Assembler::emit_rtcall(LIR_OpRTCall* op) {
-  rt_call(op->result_opr(), op->addr(), op->arguments(), op->tmp(), op->info());
+	rt_call(op->result_opr(), op->addr(), op->arguments(), op->tmp(), op->info());
 }


 void LIR_Assembler::emit_call(LIR_OpJavaCall* op) {
-  verify_oop_map(op->info());
+	verify_oop_map(op->info());

-  if (os::is_MP()) {
-    // must align calls sites, otherwise they can't be updated atomically on MP hardware
-    align_call(op->code());
-  }
+	if (os::is_MP()) {
+		// must align calls sites, otherwise they can't be updated atomically on MP hardware
+		align_call(op->code());
+	}

-  // emit the static call stub stuff out of line
-  emit_static_call_stub();
+	// emit the static call stub stuff out of line
+	emit_static_call_stub();

-  switch (op->code()) {
-  case lir_static_call:
-    call(op->addr(), relocInfo::static_call_type, op->info());
-    break;
-  case lir_optvirtual_call:
-    call(op->addr(), relocInfo::opt_virtual_call_type, op->info());
-    break;
-  case lir_icvirtual_call:
-    ic_call(op->addr(), op->info());
-    break;
-  case lir_virtual_call:
-    vtable_call(op->vtable_offset(), op->info());
-    break;
-  default: ShouldNotReachHere();
-  }
+	switch (op->code()) {
+		case lir_static_call:
+			call(op->addr(), relocInfo::static_call_type, op->info());
+			break;
+		case lir_optvirtual_call:
+			call(op->addr(), relocInfo::opt_virtual_call_type, op->info());
+			break;
+		case lir_icvirtual_call:
+			ic_call(op->addr(), op->info());
+			break;
+		case lir_virtual_call:
+			vtable_call(op->vtable_offset(), op->info());
+			break;
+		default: ShouldNotReachHere();
+	}
 #if defined(X86) && defined(TIERED)
-  // C2 leave fpu stack dirty clean it
-  if (UseSSE < 2) {
-    int i;
-    for ( i = 1; i <= 7 ; i++ ) {
-      ffree(i);
-    }
-    if (!op->result_opr()->is_float_kind()) {
-      ffree(0);
-    }
-  }
+	// C2 leave fpu stack dirty clean it
+	if (UseSSE < 2) {
+		int i;
+		for ( i = 1; i <= 7 ; i++ ) {
+			ffree(i);
+		}
+		if (!op->result_opr()->is_float_kind()) {
+			ffree(0);
+		}
+	}
 #endif // X86 && TIERED
 }


 void LIR_Assembler::emit_opLabel(LIR_OpLabel* op) {
-  _masm->bind (*(op->label()));
+	_masm->bind (*(op->label()));
 }


 void LIR_Assembler::emit_op1(LIR_Op1* op) {
-  switch (op->code()) {
-    case lir_move:
-      if (op->move_kind() == lir_move_volatile) {
-        assert(op->patch_code() == lir_patch_none, "can't patch volatiles");
-        volatile_move_op(op->in_opr(), op->result_opr(), op->type(), op->info());
-      } else {
-        move_op(op->in_opr(), op->result_opr(), op->type(),
-                op->patch_code(), op->info(), op->pop_fpu_stack(), op->move_kind() == lir_move_unaligned);
-      }
-      break;
+	switch (op->code()) {
+		case lir_move:
+			if (op->move_kind() == lir_move_volatile) {
+				assert(op->patch_code() == lir_patch_none, "can't patch volatiles");
+				volatile_move_op(op->in_opr(), op->result_opr(), op->type(), op->info());
+			} else {
+				move_op(op->in_opr(), op->result_opr(), op->type(),
+						op->patch_code(), op->info(), op->pop_fpu_stack(), op->move_kind() == lir_move_unaligned);
+			}
+			break;

-    case lir_prefetchr:
-      prefetchr(op->in_opr());
-      break;
+		case lir_prefetchr:
+			prefetchr(op->in_opr());
+			break;

-    case lir_prefetchw:
-      prefetchw(op->in_opr());
-      break;
+		case lir_prefetchw:
+			prefetchw(op->in_opr());
+			break;

-    case lir_roundfp: {
-      LIR_OpRoundFP* round_op = op->as_OpRoundFP();
-      roundfp_op(round_op->in_opr(), round_op->tmp(), round_op->result_opr(), round_op->pop_fpu_stack());
-      break;
-    }
+		case lir_roundfp: {
+					  LIR_OpRoundFP* round_op = op->as_OpRoundFP();
+					  roundfp_op(round_op->in_opr(), round_op->tmp(), round_op->result_opr(), round_op->pop_fpu_stack());
+					  break;
+				  }

-    case lir_return:
-      return_op(op->in_opr());
-      break;
+		case lir_return:
+				  return_op(op->in_opr());
+				  break;

-    case lir_safepoint:
-      if (compilation()->debug_info_recorder()->last_pc_offset() == code_offset()) {
-        _masm->nop();
-      }
-      safepoint_poll(op->in_opr(), op->info());
-      break;
+		case lir_safepoint:
+				  if (compilation()->debug_info_recorder()->last_pc_offset() == code_offset()) {
+					  _masm->nop();
+				  }
+				  safepoint_poll(op->in_opr(), op->info());
+				  break;

-    case lir_fxch:
-      fxch(op->in_opr()->as_jint());
-      break;
+		case lir_fxch:
+				  fxch(op->in_opr()->as_jint());
+				  break;

-    case lir_fld:
-      fld(op->in_opr()->as_jint());
-      break;
+		case lir_fld:
+				  fld(op->in_opr()->as_jint());
+				  break;

-    case lir_ffree:
-      ffree(op->in_opr()->as_jint());
-      break;
+		case lir_ffree:
+				  ffree(op->in_opr()->as_jint());
+				  break;

-    case lir_branch:
-      break;
+		case lir_branch:
+				  break;

-    case lir_push:
-      push(op->in_opr());
-      break;
+		case lir_push:
+				  push(op->in_opr());
+				  break;

-    case lir_pop:
-      pop(op->in_opr());
-      break;
+		case lir_pop:
+				  pop(op->in_opr());
+				  break;

-    case lir_neg:
-      negate(op->in_opr(), op->result_opr());
-      break;
+		case lir_neg:
+				  negate(op->in_opr(), op->result_opr());
+				  break;

-    case lir_leal:
-      leal(op->in_opr(), op->result_opr());
-      break;
+		case lir_leal:
+				  leal(op->in_opr(), op->result_opr());
+				  break;

-    case lir_null_check:
-      if (GenerateCompilerNullChecks) {
-        add_debug_info_for_null_check_here(op->info());
+		case lir_null_check:
+				  if (GenerateCompilerNullChecks) {
+					  add_debug_info_for_null_check_here(op->info());

-        if (op->in_opr()->is_single_cpu()) {
-          _masm->null_check(op->in_opr()->as_register());
-        } else {
-          Unimplemented();
-        }
-      }
-      break;
+					  if (op->in_opr()->is_single_cpu()) {
+						  _masm->null_check(op->in_opr()->as_register());
+					  } else {
+						  Unimplemented();
+					  }
+				  }
+				  break;

-    case lir_monaddr:
-      monitor_address(op->in_opr()->as_constant_ptr()->as_jint(), op->result_opr());
-      break;
+		case lir_monaddr:
+				  monitor_address(op->in_opr()->as_constant_ptr()->as_jint(), op->result_opr());
+				  break;

-    default:
-      Unimplemented();
-      break;
-  }
+		default:
+				  Unimplemented();
+				  break;
+	}
 }


 void LIR_Assembler::emit_op0(LIR_Op0* op) {
-  switch (op->code()) {
-    case lir_word_align: {
-      while (code_offset() % BytesPerWord != 0) {
-        _masm->nop();
-      }
-      break;
-    }
+	switch (op->code()) {
+		case lir_word_align: {
+					     while (code_offset() % BytesPerWord != 0) {
+						     _masm->nop();
+					     }
+					     break;
+				     }

-    case lir_nop:
-      assert(op->info() == NULL, "not supported");
-      _masm->nop();
-      break;
+		case lir_nop:
+				     assert(op->info() == NULL, "not supported");
+				     _masm->nop();
+				     break;

-    case lir_label:
-      Unimplemented();
-      break;
+		case lir_label:
+				     Unimplemented();
+				     break;

-    case lir_build_frame:
-      build_frame();
-      break;
+		case lir_build_frame:
+				     build_frame();
+				     break;

-    case lir_std_entry:
-      // init offsets
-      offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset());
-      _masm->align(CodeEntryAlignment);
-      if (needs_icache(compilation()->method())) {
-        check_icache();
-      }
-      offsets()->set_value(CodeOffsets::Verified_Entry, _masm->offset());
-      _masm->verified_entry();
-      build_frame();
-      offsets()->set_value(CodeOffsets::Frame_Complete, _masm->offset());
-      break;
+		case lir_std_entry:
+				     // init offsets
+				     offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset());
+				     _masm->align(CodeEntryAlignment);
+				     if (needs_icache(compilation()->method())) {
+					     check_icache();
+				     }
+				     offsets()->set_value(CodeOffsets::Verified_Entry, _masm->offset());
+				     _masm->verified_entry();
+				     build_frame();
+				     offsets()->set_value(CodeOffsets::Frame_Complete, _masm->offset());
+				     break;

-    case lir_osr_entry:
-      offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset());
-      osr_entry();
-      break;
+		case lir_osr_entry:
+				     offsets()->set_value(CodeOffsets::OSR_Entry, _masm->offset());
+				     osr_entry();
+				     break;

-    case lir_24bit_FPU:
-      set_24bit_FPU();
-      break;
+		case lir_24bit_FPU:
+				     set_24bit_FPU();
+				     break;

-    case lir_reset_FPU:
-      reset_FPU();
-      break;
+		case lir_reset_FPU:
+				     reset_FPU();
+				     break;

-    case lir_breakpoint:
-      breakpoint();
-      break;
+		case lir_breakpoint:
+				     breakpoint();
+				     break;

-    case lir_fpop_raw:
-      fpop();
-      break;
+		case lir_fpop_raw:
+				     fpop();
+				     break;

-    case lir_membar:
-      membar();
-      break;
+		case lir_membar:
+				     membar();
+				     break;

-    case lir_membar_acquire:
-      membar_acquire();
-      break;
+		case lir_membar_acquire:
+				     membar_acquire();
+				     break;

-    case lir_membar_release:
-      membar_release();
-      break;
+		case lir_membar_release:
+				     membar_release();
+				     break;

-    case lir_get_thread:
-      get_thread(op->result_opr());
-      break;
+		case lir_get_thread:
+				     get_thread(op->result_opr());
+				     break;

-    default:
-      ShouldNotReachHere();
-      break;
-  }
+		default:
+				     ShouldNotReachHere();
+				     break;
+	}
 }


 void LIR_Assembler::emit_op2(LIR_Op2* op) {
-  switch (op->code()) {
-    case lir_cmp:
-      if (op->info() != NULL) {
-        assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(),
-               "shouldn't be codeemitinfo for non-address operands");
-        add_debug_info_for_null_check_here(op->info()); // exception possible
-      }
-      comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
-      break;
+	switch (op->code()) {
+		case lir_cmp:
+#ifndef MIPS32
+			if (op->info() != NULL) {
+				assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(),
+						"shouldn't be codeemitinfo for non-address operands");
+				add_debug_info_for_null_check_here(op->info()); // exception possible
+			}
+			comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
+#else
+
+#endif
+			break;

-    case lir_cmp_l2i:
-    case lir_cmp_fd2i:
-    case lir_ucmp_fd2i:
-      comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
-      break;
-
-    case lir_cmove:
-      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr());
-      break;
+		case lir_cmp_l2i:
+		case lir_cmp_fd2i:
+		case lir_ucmp_fd2i:
+			comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
+			break;

-    case lir_shl:
-    case lir_shr:
-    case lir_ushr:
-      if (op->in_opr2()->is_constant()) {
-        shift_op(op->code(), op->in_opr1(), op->in_opr2()->as_constant_ptr()->as_jint(), op->result_opr());
-      } else {
-        shift_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp_opr());
-      }
-      break;
+		case lir_cmove:
+#ifndef MIPS32
+			cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr());
+#else
+#endif
+			break;
+
+		case lir_shl:
+		case lir_shr:
+		case lir_ushr:
+			if (op->in_opr2()->is_constant()) {
+				shift_op(op->code(), op->in_opr1(), op->in_opr2()->as_constant_ptr()->as_jint(), op->result_opr());
+			} else {
+				shift_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp_opr());
+			}
+			break;

-    case lir_add:
-    case lir_sub:
-    case lir_mul:
-    case lir_mul_strictfp:
-    case lir_div:
-    case lir_div_strictfp:
-    case lir_rem:
-      assert(op->fpu_pop_count() < 2, "");
-      arith_op(
-        op->code(),
-        op->in_opr1(),
-        op->in_opr2(),
-        op->result_opr(),
-        op->info(),
-        op->fpu_pop_count() == 1);
-      break;
+		case lir_add:
+		case lir_sub:
+		case lir_mul:
+		case lir_mul_strictfp:
+		case lir_div:
+		case lir_div_strictfp:
+		case lir_rem:
+			assert(op->fpu_pop_count() < 2, "");
+			arith_op(
+					op->code(),
+					op->in_opr1(),
+					op->in_opr2(),
+					op->result_opr(),
+					op->info(),
+					op->fpu_pop_count() == 1);
+			break;

-    case lir_abs:
-    case lir_sqrt:
-    case lir_sin:
-    case lir_tan:
-    case lir_cos:
-    case lir_log:
-    case lir_log10:
-      intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
-      break;
+		case lir_abs:
+		case lir_sqrt:
+		case lir_sin:
+		case lir_tan:
+		case lir_cos:
+		case lir_log:
+		case lir_log10:
+			intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
+			break;

-    case lir_logic_and:
-    case lir_logic_or:
-    case lir_logic_xor:
-      logic_op(
-        op->code(),
-        op->in_opr1(),
-        op->in_opr2(),
-        op->result_opr());
-      break;
+		case lir_logic_and:
+		case lir_logic_or:
+		case lir_logic_xor:
+			logic_op(
+					op->code(),
+					op->in_opr1(),
+					op->in_opr2(),
+					op->result_opr());
+			break;

-    case lir_throw:
-    case lir_unwind:
-      throw_op(op->in_opr1(), op->in_opr2(), op->info(), op->code() == lir_unwind);
-      break;
+		case lir_throw:
+		case lir_unwind:
+			throw_op(op->in_opr1(), op->in_opr2(), op->info(), op->code() == lir_unwind);
+			break;

-    default:
-      Unimplemented();
-      break;
-  }
+		default:
+			Unimplemented();
+			break;
+	}
 }


 void LIR_Assembler::build_frame() {
-  _masm->build_frame(initial_frame_size_in_bytes());
+	_masm->build_frame(initial_frame_size_in_bytes());
 }


 void LIR_Assembler::roundfp_op(LIR_Opr src, LIR_Opr tmp, LIR_Opr dest, bool pop_fpu_stack) {
-  assert((src->is_single_fpu() && dest->is_single_stack()) ||
-         (src->is_double_fpu() && dest->is_double_stack()),
-         "round_fp: rounds register -> stack location");
+	assert((src->is_single_fpu() && dest->is_single_stack()) ||
+			(src->is_double_fpu() && dest->is_double_stack()),
+			"round_fp: rounds register -> stack location");

-  reg2stack (src, dest, src->type(), pop_fpu_stack);
+	reg2stack (src, dest, src->type(), pop_fpu_stack);
 }


 void LIR_Assembler::move_op(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool unaligned) {
-  if (src->is_register()) {
-    if (dest->is_register()) {
-      assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here");
-      reg2reg(src,  dest);
-    } else if (dest->is_stack()) {
-      assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here");
-      reg2stack(src, dest, type, pop_fpu_stack);
-    } else if (dest->is_address()) {
-      reg2mem(src, dest, type, patch_code, info, pop_fpu_stack, unaligned);
-    } else {
-      ShouldNotReachHere();
-    }
+	if (src->is_register()) {
+		if (dest->is_register()) {
+			assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here");
+			reg2reg(src,  dest);
+		} else if (dest->is_stack()) {
+			assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here");
+			reg2stack(src, dest, type, pop_fpu_stack);
+		} else if (dest->is_address()) {
+			reg2mem(src, dest, type, patch_code, info, pop_fpu_stack, unaligned);
+		} else {
+			ShouldNotReachHere();
+		}

-  } else if (src->is_stack()) {
-    assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here");
-    if (dest->is_register()) {
-      stack2reg(src, dest, type);
-    } else if (dest->is_stack()) {
-      stack2stack(src, dest, type);
-    } else {
-      ShouldNotReachHere();
-    }
+	} else if (src->is_stack()) {
+		assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here");
+		if (dest->is_register()) {
+			stack2reg(src, dest, type);
+		} else if (dest->is_stack()) {
+			stack2stack(src, dest, type);
+		} else {
+			ShouldNotReachHere();
+		}

-  } else if (src->is_constant()) {
-    if (dest->is_register()) {
-      const2reg(src, dest, patch_code, info); // patching is possible
-    } else if (dest->is_stack()) {
-      assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here");
-      const2stack(src, dest);
-    } else if (dest->is_address()) {
-      assert(patch_code == lir_patch_none, "no patching allowed here");
-      const2mem(src, dest, type, info);
-    } else {
-      ShouldNotReachHere();
-    }
+	} else if (src->is_constant()) {
+		if (dest->is_register()) {
+			const2reg(src, dest, patch_code, info); // patching is possible
+		} else if (dest->is_stack()) {
+			assert(patch_code == lir_patch_none && info == NULL, "no patching and info allowed here");
+			const2stack(src, dest);
+		} else if (dest->is_address()) {
+			assert(patch_code == lir_patch_none, "no patching allowed here");
+			const2mem(src, dest, type, info);
+		} else {
+			ShouldNotReachHere();
+		}

-  } else if (src->is_address()) {
-    mem2reg(src, dest, type, patch_code, info, unaligned);
+	} else if (src->is_address()) {
+		mem2reg(src, dest, type, patch_code, info, unaligned);

-  } else {
-    ShouldNotReachHere();
-  }
+	} else {
+		ShouldNotReachHere();
+	}
 }


 void LIR_Assembler::verify_oop_map(CodeEmitInfo* info) {
 #ifndef PRODUCT
-  if (VerifyOopMaps || VerifyOops) {
-    bool v = VerifyOops;
-    VerifyOops = true;
-    OopMapStream s(info->oop_map());
-    while (!s.is_done()) {
-      OopMapValue v = s.current();
-      if (v.is_oop()) {
-        VMReg r = v.reg();
-        if (!r->is_stack()) {
-          stringStream st;
-          st.print("bad oop %s at %d", r->as_Register()->name(), _masm->offset());
+	if (VerifyOopMaps || VerifyOops) {
+		bool v = VerifyOops;
+		VerifyOops = true;
+		OopMapStream s(info->oop_map());
+		while (!s.is_done()) {
+			OopMapValue v = s.current();
+			if (v.is_oop()) {
+				VMReg r = v.reg();
+				if (!r->is_stack()) {
+					stringStream st;
+					st.print("bad oop %s at %d", r->as_Register()->name(), _masm->offset());
 #ifdef SPARC
-          _masm->_verify_oop(r->as_Register(), strdup(st.as_string()), __FILE__, __LINE__);
+					_masm->_verify_oop(r->as_Register(), strdup(st.as_string()), __FILE__, __LINE__);
 #else
-          _masm->verify_oop(r->as_Register());
+					_masm->verify_oop(r->as_Register());
 #endif
-        } else {
-          _masm->verify_stack_oop(r->reg2stack() * VMRegImpl::stack_slot_size);
-        }
-      }
-      s.next();
-    }
-    VerifyOops = v;
-  }
+				} else {
+					_masm->verify_stack_oop(r->reg2stack() * VMRegImpl::stack_slot_size);
+				}
+			}
+			s.next();
+		}
+		VerifyOops = v;
+	}
 #endif
 }
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -170,7 +170,6 @@

 void PhiResolver::move(LIR_Opr src, LIR_Opr dest) {
   assert(dest->is_virtual(), "");
-  // tty->print("move "); src->print(); tty->print(" to "); dest->print(); tty->cr();
   assert(src->is_valid(), "");
   assert(dest->is_valid(), "");
   ResolveNode* source = source_node(src);
@@ -285,7 +284,14 @@


 void LIRGenerator::init() {
-  _bs = Universe::heap()->barrier_set();
+	_bs = Universe::heap()->barrier_set();
+#ifdef MIPS32
+	assert(_bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+	CardTableModRefBS* ct = (CardTableModRefBS*)_bs;
+	assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+	_card_table_base = new LIR_Const((intptr_t)ct->byte_map_base);
+#endif
 }


@@ -447,15 +453,28 @@

 void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
                                     CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) {
-  CodeStub* stub = new RangeCheckStub(range_check_info, index);
-  if (index->is_constant()) {
-    cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
-                index->as_jint(), null_check_info);
-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
+	CodeStub* stub = new RangeCheckStub(range_check_info, index);
+	if (index->is_constant()) {
+#ifndef MIPS32
+	cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
+		index->as_jint(), null_check_info);
+	__ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
+#else
+	LIR_Opr left = LIR_OprFact::address(new LIR_Address(array, arrayOopDesc::length_offset_in_bytes(), T_INT));
+	LIR_Opr right = LIR_OprFact::intConst(index->as_jint());
+	__ branch(lir_cond_belowEqual,left, right ,T_INT, stub); // forward branch
+
+#endif
   } else {
-    cmp_reg_mem(lir_cond_aboveEqual, index, array,
-                arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info);
-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
+#ifndef MIPS32
+	cmp_reg_mem(lir_cond_aboveEqual, index, array,
+		arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info);
+	__ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
+#else
+	LIR_Opr left = index;
+	LIR_Opr right = LIR_OprFact::address(new LIR_Address( array, arrayOopDesc::length_offset_in_bytes(), T_INT));
+	__ branch(lir_cond_aboveEqual,left, right ,T_INT, stub); // forward branch
+#endif
   }
 }

@@ -463,14 +482,28 @@
 void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) {
   CodeStub* stub = new RangeCheckStub(info, index, true);
   if (index->is_constant()) {
-    cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info);
-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
+#ifndef MIPS32
+	cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info);
+	__ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
+#else
+	LIR_Opr left = LIR_OprFact::address(new LIR_Address(buffer, java_nio_Buffer::limit_offset(),T_INT));
+	LIR_Opr right = LIR_OprFact::intConst(index->as_jint());
+	__ branch(lir_cond_belowEqual,left, right ,T_INT, stub); // forward branch
+
+#endif
   } else {
+#ifndef MIPS32
     cmp_reg_mem(lir_cond_aboveEqual, index, buffer,
                 java_nio_Buffer::limit_offset(), T_INT, info);
     __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
+#else
+	LIR_Opr left = index;
+	LIR_Opr right = LIR_OprFact::address(new LIR_Address( buffer, java_nio_Buffer::limit_offset(), T_INT));
+	__ branch(lir_cond_aboveEqual,left, right ,T_INT, stub); // forward branch
+
+#endif
   }
-  __ move(index, result);
+	__ move(index, result);
 }


@@ -582,7 +615,6 @@
     __ move(value, result_op);
     value = result_op;
   }
-
   assert(count->is_constant() || count->is_register(), "must be");
   switch(code) {
   case Bytecodes::_ishl:
@@ -638,8 +670,12 @@
   __ unlock_object(hdr, object, lock, slow_path);
 }

-
+#ifndef MIPS32
 void LIRGenerator::new_instance(LIR_Opr dst, ciInstanceKlass* klass, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info) {
+#else
+void LIRGenerator::new_instance(LIR_Opr dst, ciInstanceKlass* klass, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3,
+				LIR_Opr scratch4, LIR_Opr scratch5, LIR_Opr scratch6,LIR_Opr klass_reg, CodeEmitInfo* info) {
+#endif
   jobject2reg_with_patching(klass_reg, klass, info);
   // If klass is not loaded we do not know if the klass has finalizers:
   if (UseFastNewInstance && klass->is_loaded()
@@ -653,12 +689,23 @@
     // allocate space for instance
     assert(klass->size_helper() >= 0, "illegal instance size");
     const int instance_size = align_object_size(klass->size_helper());
+#ifndef MIPS32
     __ allocate_object(dst, scratch1, scratch2, scratch3, scratch4,
                        oopDesc::header_size(), instance_size, klass_reg, !klass->is_initialized(), slow_path);
+#else
+    __ allocate_object(dst, scratch1, scratch2, scratch3, scratch4, scratch5, scratch6,
+			oopDesc::header_size(), instance_size, klass_reg, !klass->is_initialized(), slow_path);
+
+#endif
   } else {
     CodeStub* slow_path = new NewInstanceStub(klass_reg, dst, klass, info, Runtime1::new_instance_id);
+#ifndef MIPS32
     __ branch(lir_cond_always, T_ILLEGAL, slow_path);
     __ branch_destination(slow_path->continuation());
+#else
+    __ branch(lir_cond_always, LIR_OprFact::illegalOpr,  LIR_OprFact::illegalOpr, T_ILLEGAL, slow_path);
+    __ branch_destination(slow_path->continuation());
+#endif
   }
 }

@@ -815,7 +862,7 @@
   return tmp;
 }

-
+#ifndef MIPS32
 void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
   if (if_instr->should_profile()) {
     ciMethod* method = if_instr->profiled_method();
@@ -846,7 +893,49 @@
     __ move(data_reg, LIR_OprFact::address(data_addr));
   }
 }
-
+#else
+void LIRGenerator::profile_branch(If* if_instr, If::Condition cond , LIR_Opr left, LIR_Opr right) {
+	if (if_instr->should_profile()) {
+		ciMethod* method = if_instr->profiled_method();
+		assert(method != NULL, "method should be set if branch is profiled");
+		ciMethodData* md = method->method_data();
+		if (md == NULL) {
+			bailout("out of memory building methodDataOop");
+			return;
+		}
+		ciProfileData* data = md->bci_to_data(if_instr->profiled_bci());
+		assert(data != NULL, "must have profiling data");
+		assert(data->is_BranchData(), "need BranchData for two-way branches");
+		int taken_count_offset     = md->byte_offset_of_slot(data, BranchData::taken_offset());
+		int not_taken_count_offset = md->byte_offset_of_slot(data, BranchData::not_taken_offset());
+		LIR_Opr md_reg = new_register(T_OBJECT);
+		__ move(LIR_OprFact::oopConst(md->encoding()), md_reg);
+		LIR_Opr data_offset_reg = new_register(T_INT);
+
+		LIR_Opr opr1 =  LIR_OprFact::intConst(taken_count_offset);
+		LIR_Opr opr2 =  LIR_OprFact::intConst(not_taken_count_offset);
+		LabelObj* skip = new LabelObj();
+
+		__ move(opr1, data_offset_reg);
+		__ branch( lir_cond(cond), left, right, skip->label());
+		__ move(opr2, data_offset_reg);
+		__ branch_destination(skip->label());
+
+		LIR_Opr data_reg = new_register(T_INT);
+		LIR_Opr tmp_reg = new_register(T_INT);
+		// LIR_Address* data_addr = new LIR_Address(md_reg, data_offset_reg, T_INT);
+		__ move(data_offset_reg, tmp_reg);
+		__ add(tmp_reg, md_reg, tmp_reg);
+		LIR_Address* data_addr = new LIR_Address(tmp_reg, 0, T_INT);
+		__ move(LIR_OprFact::address(data_addr), data_reg);
+		LIR_Address* fake_incr_value = new LIR_Address(data_reg, DataLayout::counter_increment, T_INT);
+		// Use leal instead of add to avoid destroying condition codes on x86
+		__ leal(LIR_OprFact::address(fake_incr_value), data_reg);
+		__ move(data_reg, LIR_OprFact::address(data_addr));
+	}
+}
+
+#endif

 // Phi technique:
 // This is about passing live values from one basic block to the other.
@@ -920,6 +1009,7 @@


 LIR_Opr LIRGenerator::new_register(BasicType type) {
+
   int vreg = _virtual_register_number;
   // add a little fudge factor for the bailout, since the bailout is
   // only checked periodically.  This gives a few extra registers to
@@ -979,6 +1069,36 @@
   return NULL;
 }

+#ifdef MIPS32
+void LIRGenerator::write_barrier(LIR_Opr addr) {
+	if (addr->is_address()) {
+	LIR_Address* address = (LIR_Address*)addr;
+	LIR_Opr ptr = new_register(T_OBJECT);
+	if (!address->index()->is_valid() && address->disp() == 0) {
+		__ move(address->base(), ptr);
+	} else {
+		__ leal(addr, ptr);
+	}
+		addr = ptr;
+	}
+	assert(addr->is_register(), "must be a register at this point");
+
+	LIR_Opr tmp = new_register(T_OBJECT);
+	if (TwoOperandLIRForm) {
+		__ move(addr, tmp);
+		__ unsigned_shift_right(tmp, CardTableModRefBS::card_shift, tmp);
+	} else {
+		__ unsigned_shift_right(addr, CardTableModRefBS::card_shift, tmp);
+	}
+	if (can_inline_as_constant(card_table_base())) {
+		__ move(LIR_OprFact::intConst(0), new LIR_Address(tmp, card_table_base()->as_jint(), T_BYTE));
+	} else {
+		__ add(tmp, load_constant(card_table_base()), tmp);
+		__ move(LIR_OprFact::intConst(0), new LIR_Address(tmp, 0, T_BYTE));
+	}
+}
+#endif
+

 void LIRGenerator::do_ExceptionObject(ExceptionObject* x) {
   assert(block()->is_set(BlockBegin::exception_entry_flag), "ExceptionObject only allowed in exception handler block");
@@ -1306,15 +1426,21 @@

   LIR_Opr pre_val = new_register(T_OBJECT);

-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
   if (!addr_opr->is_address()) {
     assert(addr_opr->is_register(), "must be");
     addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT));
   }
   CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code,
                                         info);
+
+#ifndef MIPS32
+  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
   __ branch(lir_cond_notEqual, T_INT, slow);
+#else
+  __ branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
+#endif
   __ branch_destination(slow->continuation());
+
 }

 void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
@@ -1352,6 +1478,7 @@
   LIR_Opr xor_shift_res = new_pointer_register();

   if (TwoOperandLIRForm ) {
+
     __ move(addr, xor_res);
     __ logical_xor(xor_res, new_val, xor_res);
     __ move(xor_res, xor_shift_res);
@@ -1374,10 +1501,15 @@
   }
   assert(new_val->is_register(), "must be a register at this point");

-  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));

   CodeStub* slow = new G1PostBarrierStub(addr, new_val);
+#ifndef MIPS32
+  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
   __ branch(lir_cond_notEqual, T_INT, slow);
+#else
+
+  __ branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD), T_INT, slow);
+#endif
   __ branch_destination(slow->continuation());
 }

@@ -1412,8 +1544,9 @@
     __ move(LIR_OprFact::intConst(0),
               new LIR_Address(tmp, card_table_base->as_jint(), T_BYTE));
   } else {
+    __ add(tmp, load_constant(card_table_base), tmp);
     __ move(LIR_OprFact::intConst(0),
-              new LIR_Address(tmp, load_constant(card_table_base),
+              new LIR_Address(tmp, 0,
                               T_BYTE));
   }
 }
@@ -1627,12 +1760,24 @@
     CodeEmitInfo* info = state_for(x);
     CodeStub* stub = new RangeCheckStub(info, index.result(), true);
     if (index.result()->is_constant()) {
+#ifndef MIPS32
       cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info);
       __ branch(lir_cond_belowEqual, T_INT, stub);
+#else
+	LIR_Opr left = LIR_OprFact::address(new LIR_Address( buf.result(), java_nio_Buffer::limit_offset(),T_INT));
+	LIR_Opr right = LIR_OprFact::intConst(index.result()->as_jint());
+	__ branch(lir_cond_belowEqual,left, right ,T_INT, stub); // forward branch
+#endif
     } else {
+#ifndef MIPS32
       cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(),
                   java_nio_Buffer::limit_offset(), T_INT, info);
       __ branch(lir_cond_aboveEqual, T_INT, stub);
+#else
+	LIR_Opr right = LIR_OprFact::address(new LIR_Address( buf.result(), java_nio_Buffer::limit_offset(),T_INT));
+	LIR_Opr left =  index.result();
+	__ branch(lir_cond_aboveEqual,left, right ,T_INT, stub); // forward branch
+#endif
     }
     __ move(index.result(), result);
   } else {
@@ -1704,8 +1849,12 @@
     if (use_length) {
       // TODO: use a (modified) version of array_range_check that does not require a
       //       constant length to be loaded to a register
+#ifndef MIPS32
       __ cmp(lir_cond_belowEqual, length.result(), index.result());
       __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+#else
+	__ branch(lir_cond_belowEqual, length.result(), index.result(),T_INT, new RangeCheckStub(range_check_info, index.result()));
+#endif
     } else {
       array_range_check(array.result(), index.result(), null_check_info, range_check_info);
       // The range check performs the null check, so clear it out for the load
@@ -1862,11 +2011,20 @@
     addr = new LIR_Address(base_op, index_op, LIR_Address::Scale(log2_scale), 0, dst_type);
 #else
     if (index_op->is_illegal() || log2_scale == 0) {
+#ifndef MIPS32
       addr = new LIR_Address(base_op, index_op, dst_type);
+#else
+	LIR_Opr ptr = new_register(T_INT);
+	__ move(base_op, ptr);
+	if(index_op -> is_valid())
+		__ add(ptr, index_op, ptr);
+	addr = new LIR_Address(ptr, 0, dst_type);
+#endif
     } else {
       LIR_Opr tmp = new_register(T_INT);
       __ shift_left(index_op, log2_scale, tmp);
-      addr = new LIR_Address(base_op, tmp, dst_type);
+      __ add(base_op, base_op, tmp);
+      addr = new LIR_Address(base_op, 0, dst_type);
     }
 #endif
   }
@@ -1923,9 +2081,17 @@
     __ move(idx.result(), index_op);
     __ shift_left(index_op, log2_scale, index_op);
   }
-
-  LIR_Address* addr = new LIR_Address(base_op, index_op, x->basic_type());
-  __ move(value.result(), addr);
+#ifndef MIPS32
+	LIR_Address* addr = new LIR_Address(base_op, index_op, x->basic_type());
+#else
+	LIR_Opr ptr = new_register(T_INT);
+	__ move(base_op, ptr);
+	if(index_op -> is_valid())
+		__ add(ptr, index_op, ptr);
+	LIR_Address* addr = new LIR_Address(ptr, 0,  x->basic_type());
+
+#endif
+	__ move(value.result(), addr);
 }


@@ -2004,20 +2170,36 @@
     int high_key = one_range->high_key();
     BlockBegin* dest = one_range->sux();
     if (low_key == high_key) {
+#ifndef MIPS32
       __ cmp(lir_cond_equal, value, low_key);
       __ branch(lir_cond_equal, T_INT, dest);
+#else
+      __ branch(lir_cond_equal, value, LIR_OprFact::intConst(low_key), T_INT, dest);
+#endif
     } else if (high_key - low_key == 1) {
+#ifndef MIPS32
       __ cmp(lir_cond_equal, value, low_key);
       __ branch(lir_cond_equal, T_INT, dest);
       __ cmp(lir_cond_equal, value, high_key);
       __ branch(lir_cond_equal, T_INT, dest);
+#else
+      __ branch(lir_cond_equal, value, LIR_OprFact::intConst(low_key), T_INT, dest);
+      __ branch(lir_cond_equal, value, LIR_OprFact::intConst(high_key), T_INT, dest);
+
+#endif
     } else {
       LabelObj* L = new LabelObj();
+#ifndef MIPS32
       __ cmp(lir_cond_less, value, low_key);
       __ branch(lir_cond_less, L->label());
       __ cmp(lir_cond_lessEqual, value, high_key);
       __ branch(lir_cond_lessEqual, T_INT, dest);
       __ branch_destination(L->label());
+#else
+      __ branch(lir_cond_less, value, LIR_OprFact::intConst(low_key), L->label());
+      __ branch(lir_cond_lessEqual, value, LIR_OprFact::intConst(high_key), T_INT, dest);
+      __ branch_destination(L->label());
+#endif
     }
   }
   __ jump(default_sux);
@@ -2104,8 +2286,12 @@
     do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux());
   } else {
     for (int i = 0; i < len; i++) {
+#ifndef MIPS32
       __ cmp(lir_cond_equal, value, i + lo_key);
       __ branch(lir_cond_equal, T_INT, x->sux_at(i));
+#else
+      __ branch(lir_cond_equal, value, LIR_OprFact::intConst(i+lo_key), T_INT, x->sux_at(i));
+#endif
     }
     __ jump(x->default_sux());
   }
@@ -2130,8 +2316,13 @@
   } else {
     int len = x->length();
     for (int i = 0; i < len; i++) {
+#ifndef MIPS32
       __ cmp(lir_cond_equal, value, x->key_at(i));
       __ branch(lir_cond_equal, T_INT, x->sux_at(i));
+#else
+      __ branch(lir_cond_equal, value, LIR_OprFact::intConst(x->key_at(i)), T_INT, x->sux_at(i));
+#endif
+
     }
     __ jump(x->default_sux());
   }
@@ -2443,8 +2634,18 @@
   f_val.dont_load_item();
   LIR_Opr reg = rlock_result(x);

+#ifndef MIPS32
   __ cmp(lir_cond(x->cond()), left.result(), right.result());
   __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg);
+#else
+	LIR_Opr opr1 =  t_val.result();
+	LIR_Opr opr2 =  f_val.result();
+	LabelObj* skip = new LabelObj();
+	__ move(opr1, reg);
+	__ branch(lir_cond(x->cond()), left.result(), right.result(), skip->label());
+	__ move(opr2, reg);
+	__ branch_destination(skip->label());
+#endif
 }
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -175,6 +175,9 @@
   GrowableArray<LIR_Const*>       _constants;
   LIR_OprList                     _reg_for_constants;
   Values                          _unpinned_constants;
+#ifdef MIPS32
+  LIR_Const*			  _card_table_base;
+#endif

   friend class PhiResolver;

@@ -195,6 +198,10 @@
   // get a constant into a register and get track of what register was used
   LIR_Opr load_constant(Constant* x);
   LIR_Opr load_constant(LIR_Const* constant);
+#ifdef MIPS32
+  LIR_Const* card_table_base() const { return _card_table_base; }
+#endif
+

   void  set_result(Value x, LIR_Opr opr)           {
     assert(opr->is_valid(), "must set to valid value");
@@ -213,7 +220,11 @@
   LIR_Opr round_item(LIR_Opr opr);
   LIR_Opr force_to_spill(LIR_Opr value, BasicType t);

+#ifndef MIPS32
   void  profile_branch(If* if_instr, If::Condition cond);
+#else
+  void  profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right);
+#endif

   PhiResolverState& resolver_state() { return _resolver_state; }

@@ -316,7 +327,15 @@
   void monitor_enter (LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no, CodeEmitInfo* info_for_exception, CodeEmitInfo* info);
   void monitor_exit  (LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, int monitor_no);

-  void new_instance    (LIR_Opr  dst, ciInstanceKlass* klass, LIR_Opr  scratch1, LIR_Opr  scratch2, LIR_Opr  scratch3,  LIR_Opr scratch4, LIR_Opr  klass_reg, CodeEmitInfo* info);
+#ifndef MIPS32
+  void new_instance    (LIR_Opr  dst, ciInstanceKlass* klass, LIR_Opr  scratch1, LIR_Opr  scratch2, LIR_Opr  scratch3,
+			LIR_Opr scratch4, LIR_Opr  klass_reg, CodeEmitInfo* info);
+#else
+   void new_instance    (LIR_Opr  dst, ciInstanceKlass* klass, LIR_Opr  scratch1, LIR_Opr  scratch2, LIR_Opr  scratch3,
+			LIR_Opr scratch4, LIR_Opr scratch5,  LIR_Opr scratch6,  LIR_Opr  klass_reg, CodeEmitInfo* info);
+
+#endif
+

   // machine dependent
   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
@@ -338,6 +357,10 @@
   }
   LIR_Address* emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type, bool needs_card_mark);

+#ifdef MIPS32
+  void write_barrier(LIR_Opr addr);
+#endif
+
   // machine preferences and characteristics
   bool can_inline_as_constant(Value i) const;
   bool can_inline_as_constant(LIR_Const* c) const;
--- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 2005-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,25 +29,25 @@

 #ifndef PRODUCT

-  static LinearScanStatistic _stat_before_alloc;
-  static LinearScanStatistic _stat_after_asign;
-  static LinearScanStatistic _stat_final;
-
-  static LinearScanTimers _total_timer;
-
-  // helper macro for short definition of timer
-  #define TIME_LINEAR_SCAN(timer_name)  TraceTime _block_timer("", _total_timer.timer(LinearScanTimers::timer_name), TimeLinearScan || TimeEachLinearScan, Verbose);
-
-  // helper macro for short definition of trace-output inside code
-  #define TRACE_LINEAR_SCAN(level, code)       \
-    if (TraceLinearScanLevel >= level) {       \
-      code;                                    \
-    }
+static LinearScanStatistic _stat_before_alloc;
+static LinearScanStatistic _stat_after_asign;
+static LinearScanStatistic _stat_final;
+
+static LinearScanTimers _total_timer;
+
+// helper macro for short definition of timer
+#define TIME_LINEAR_SCAN(timer_name)  TraceTime _block_timer("", _total_timer.timer(LinearScanTimers::timer_name), TimeLinearScan || TimeEachLinearScan, Verbose);
+
+// helper macro for short definition of trace-output inside code
+#define TRACE_LINEAR_SCAN(level, code)       \
+	if (TraceLinearScanLevel >= level) {       \
+		code;                                    \
+	}

 #else

-  #define TIME_LINEAR_SCAN(timer_name)
-  #define TRACE_LINEAR_SCAN(level, code)
+#define TIME_LINEAR_SCAN(timer_name)
+#define TRACE_LINEAR_SCAN(level, code)

 #endif

@@ -60,38 +61,38 @@

 // Implementation of LinearScan

-LinearScan::LinearScan(IR* ir, LIRGenerator* gen, FrameMap* frame_map)
- : _compilation(ir->compilation())
- , _ir(ir)
- , _gen(gen)
- , _frame_map(frame_map)
- , _num_virtual_regs(gen->max_virtual_register_number())
- , _has_fpu_registers(false)
- , _num_calls(-1)
- , _max_spills(0)
- , _unused_spill_slot(-1)
- , _intervals(0)   // initialized later with correct length
- , _new_intervals_from_allocation(new IntervalList())
- , _sorted_intervals(NULL)
- , _lir_ops(0)     // initialized later with correct length
- , _block_of_op(0) // initialized later with correct length
- , _has_info(0)
- , _has_call(0)
- , _scope_value_cache(0) // initialized later with correct length
- , _interval_in_loop(0, 0) // initialized later with correct length
- , _cached_blocks(*ir->linear_scan_order())
+	LinearScan::LinearScan(IR* ir, LIRGenerator* gen, FrameMap* frame_map)
+	: _compilation(ir->compilation())
+	, _ir(ir)
+	, _gen(gen)
+	, _frame_map(frame_map)
+	, _num_virtual_regs(gen->max_virtual_register_number())
+	, _has_fpu_registers(false)
+	, _num_calls(-1)
+	, _max_spills(0)
+	, _unused_spill_slot(-1)
+	, _intervals(0)   // initialized later with correct length
+	, _new_intervals_from_allocation(new IntervalList())
+	, _sorted_intervals(NULL)
+	, _lir_ops(0)     // initialized later with correct length
+	, _block_of_op(0) // initialized later with correct length
+	, _has_info(0)
+	, _has_call(0)
+	, _scope_value_cache(0) // initialized later with correct length
+	, _interval_in_loop(0, 0) // initialized later with correct length
+	  , _cached_blocks(*ir->linear_scan_order())
 #ifdef X86
- , _fpu_stack_allocator(NULL)
+	  , _fpu_stack_allocator(NULL)
 #endif
 {
-  // note: to use more than on instance of LinearScan at a time this function call has to
-  //       be moved somewhere outside of this constructor:
-  Interval::initialize();
-
-  assert(this->ir() != NULL,          "check if valid");
-  assert(this->compilation() != NULL, "check if valid");
-  assert(this->gen() != NULL,         "check if valid");
-  assert(this->frame_map() != NULL,   "check if valid");
+	// note: to use more than on instance of LinearScan at a time this function call has to
+	//       be moved somewhere outside of this constructor:
+	Interval::initialize();
+
+	assert(this->ir() != NULL,          "check if valid");
+	assert(this->compilation() != NULL, "check if valid");
+	assert(this->gen() != NULL,         "check if valid");
+	assert(this->frame_map() != NULL,   "check if valid");
 }


@@ -107,91 +108,91 @@
 //       is done in calc_operand_for_interval()

 int LinearScan::reg_num(LIR_Opr opr) {
-  assert(opr->is_register(), "should not call this otherwise");
-
-  if (opr->is_virtual_register()) {
-    assert(opr->vreg_number() >= nof_regs, "found a virtual register with a fixed-register number");
-    return opr->vreg_number();
-  } else if (opr->is_single_cpu()) {
-    return opr->cpu_regnr();
-  } else if (opr->is_double_cpu()) {
-    return opr->cpu_regnrLo();
+	assert(opr->is_register(), "should not call this otherwise");
+
+	if (opr->is_virtual_register()) {
+		assert(opr->vreg_number() >= nof_regs, "found a virtual register with a fixed-register number");
+		return opr->vreg_number();
+	} else if (opr->is_single_cpu()) {
+		return opr->cpu_regnr();
+	} else if (opr->is_double_cpu()) {
+		return opr->cpu_regnrLo();
 #ifdef X86
-  } else if (opr->is_single_xmm()) {
-    return opr->fpu_regnr() + pd_first_xmm_reg;
-  } else if (opr->is_double_xmm()) {
-    return opr->fpu_regnrLo() + pd_first_xmm_reg;
+	} else if (opr->is_single_xmm()) {
+		return opr->fpu_regnr() + pd_first_xmm_reg;
+	} else if (opr->is_double_xmm()) {
+		return opr->fpu_regnrLo() + pd_first_xmm_reg;
 #endif
-  } else if (opr->is_single_fpu()) {
-    return opr->fpu_regnr() + pd_first_fpu_reg;
-  } else if (opr->is_double_fpu()) {
-    return opr->fpu_regnrLo() + pd_first_fpu_reg;
-  } else {
-    ShouldNotReachHere();
-    return -1;
-  }
+	} else if (opr->is_single_fpu()) {
+		return opr->fpu_regnr() + pd_first_fpu_reg;
+	} else if (opr->is_double_fpu()) {
+		return opr->fpu_regnrLo() + pd_first_fpu_reg;
+	} else {
+		ShouldNotReachHere();
+		return -1;
+	}
 }

 int LinearScan::reg_numHi(LIR_Opr opr) {
-  assert(opr->is_register(), "should not call this otherwise");
-
-  if (opr->is_virtual_register()) {
-    return -1;
-  } else if (opr->is_single_cpu()) {
-    return -1;
-  } else if (opr->is_double_cpu()) {
-    return opr->cpu_regnrHi();
+	assert(opr->is_register(), "should not call this otherwise");
+
+	if (opr->is_virtual_register()) {
+		return -1;
+	} else if (opr->is_single_cpu()) {
+		return -1;
+	} else if (opr->is_double_cpu()) {
+		return opr->cpu_regnrHi();
 #ifdef X86
-  } else if (opr->is_single_xmm()) {
-    return -1;
-  } else if (opr->is_double_xmm()) {
-    return -1;
+	} else if (opr->is_single_xmm()) {
+		return -1;
+	} else if (opr->is_double_xmm()) {
+		return -1;
 #endif
-  } else if (opr->is_single_fpu()) {
-    return -1;
-  } else if (opr->is_double_fpu()) {
-    return opr->fpu_regnrHi() + pd_first_fpu_reg;
-  } else {
-    ShouldNotReachHere();
-    return -1;
-  }
+	} else if (opr->is_single_fpu()) {
+		return -1;
+	} else if (opr->is_double_fpu()) {
+		return opr->fpu_regnrHi() + pd_first_fpu_reg;
+	} else {
+		ShouldNotReachHere();
+		return -1;
+	}
 }


 // ********** functions for classification of intervals

 bool LinearScan::is_precolored_interval(const Interval* i) {
-  return i->reg_num() < LinearScan::nof_regs;
+	return i->reg_num() < LinearScan::nof_regs;
 }

 bool LinearScan::is_virtual_interval(const Interval* i) {
-  return i->reg_num() >= LIR_OprDesc::vreg_base;
+	return i->reg_num() >= LIR_OprDesc::vreg_base;
 }

 bool LinearScan::is_precolored_cpu_interval(const Interval* i) {
-  return i->reg_num() < LinearScan::nof_cpu_regs;
+	return i->reg_num() < LinearScan::nof_cpu_regs;
 }

 bool LinearScan::is_virtual_cpu_interval(const Interval* i) {
-  return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() != T_FLOAT && i->type() != T_DOUBLE);
+	return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() != T_FLOAT && i->type() != T_DOUBLE);
 }

 bool LinearScan::is_precolored_fpu_interval(const Interval* i) {
-  return i->reg_num() >= LinearScan::nof_cpu_regs && i->reg_num() < LinearScan::nof_regs;
+	return i->reg_num() >= LinearScan::nof_cpu_regs && i->reg_num() < LinearScan::nof_regs;
 }

 bool LinearScan::is_virtual_fpu_interval(const Interval* i) {
-  return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() == T_FLOAT || i->type() == T_DOUBLE);
+	return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() == T_FLOAT || i->type() == T_DOUBLE);
 }

 bool LinearScan::is_in_fpu_register(const Interval* i) {
-  // fixed intervals not needed for FPU stack allocation
-  return i->reg_num() >= nof_regs && pd_first_fpu_reg <= i->assigned_reg() && i->assigned_reg() <= pd_last_fpu_reg;
+	// fixed intervals not needed for FPU stack allocation
+	return i->reg_num() >= nof_regs && pd_first_fpu_reg <= i->assigned_reg() && i->assigned_reg() <= pd_last_fpu_reg;
 }

 bool LinearScan::is_oop_interval(const Interval* i) {
-  // fixed intervals never contain oops
-  return i->reg_num() >= nof_regs && i->type() == T_OBJECT;
+	// fixed intervals never contain oops
+	return i->reg_num() >= nof_regs && i->type() == T_OBJECT;
 }


@@ -199,94 +200,94 @@

 // compute next unused stack index that can be used for spilling
 int LinearScan::allocate_spill_slot(bool double_word) {
-  int spill_slot;
-  if (double_word) {
-    if ((_max_spills & 1) == 1) {
-      // alignment of double-word values
-      // the hole because of the alignment is filled with the next single-word value
-      assert(_unused_spill_slot == -1, "wasting a spill slot");
-      _unused_spill_slot = _max_spills;
-      _max_spills++;
-    }
-    spill_slot = _max_spills;
-    _max_spills += 2;
-
-  } else if (_unused_spill_slot != -1) {
-    // re-use hole that was the result of a previous double-word alignment
-    spill_slot = _unused_spill_slot;
-    _unused_spill_slot = -1;
-
-  } else {
-    spill_slot = _max_spills;
-    _max_spills++;
-  }
-
-  int result = spill_slot + LinearScan::nof_regs + frame_map()->argcount();
-
-  // the class OopMapValue uses only 11 bits for storing the name of the
-  // oop location. So a stack slot bigger than 2^11 leads to an overflow
-  // that is not reported in product builds. Prevent this by checking the
-  // spill slot here (altough this value and the later used location name
-  // are slightly different)
-  if (result > 2000) {
-    bailout("too many stack slots used");
-  }
-
-  return result;
+	int spill_slot;
+	if (double_word) {
+		if ((_max_spills & 1) == 1) {
+			// alignment of double-word values
+			// the hole because of the alignment is filled with the next single-word value
+			assert(_unused_spill_slot == -1, "wasting a spill slot");
+			_unused_spill_slot = _max_spills;
+			_max_spills++;
+		}
+		spill_slot = _max_spills;
+		_max_spills += 2;
+
+	} else if (_unused_spill_slot != -1) {
+		// re-use hole that was the result of a previous double-word alignment
+		spill_slot = _unused_spill_slot;
+		_unused_spill_slot = -1;
+
+	} else {
+		spill_slot = _max_spills;
+		_max_spills++;
+	}
+
+	int result = spill_slot + LinearScan::nof_regs + frame_map()->argcount();
+
+	// the class OopMapValue uses only 11 bits for storing the name of the
+	// oop location. So a stack slot bigger than 2^11 leads to an overflow
+	// that is not reported in product builds. Prevent this by checking the
+	// spill slot here (altough this value and the later used location name
+	// are slightly different)
+	if (result > 2000) {
+		bailout("too many stack slots used");
+	}
+
+	return result;
 }

 void LinearScan::assign_spill_slot(Interval* it) {
-  // assign the canonical spill slot of the parent (if a part of the interval
-  // is already spilled) or allocate a new spill slot
-  if (it->canonical_spill_slot() >= 0) {
-    it->assign_reg(it->canonical_spill_slot());
-  } else {
-    int spill = allocate_spill_slot(type2spill_size[it->type()] == 2);
-    it->set_canonical_spill_slot(spill);
-    it->assign_reg(spill);
-  }
+	// assign the canonical spill slot of the parent (if a part of the interval
+	// is already spilled) or allocate a new spill slot
+	if (it->canonical_spill_slot() >= 0) {
+		it->assign_reg(it->canonical_spill_slot());
+	} else {
+		int spill = allocate_spill_slot(type2spill_size[it->type()] == 2);
+		it->set_canonical_spill_slot(spill);
+		it->assign_reg(spill);
+	}
 }

 void LinearScan::propagate_spill_slots() {
-  if (!frame_map()->finalize_frame(max_spills())) {
-    bailout("frame too large");
-  }
+	if (!frame_map()->finalize_frame(max_spills())) {
+		bailout("frame too large");
+	}
 }

 // create a new interval with a predefined reg_num
 // (only used for parent intervals that are created during the building phase)
 Interval* LinearScan::create_interval(int reg_num) {
-  assert(_intervals.at(reg_num) == NULL, "overwriting exisiting interval");
-
-  Interval* interval = new Interval(reg_num);
-  _intervals.at_put(reg_num, interval);
-
-  // assign register number for precolored intervals
-  if (reg_num < LIR_OprDesc::vreg_base) {
-    interval->assign_reg(reg_num);
-  }
-  return interval;
+	assert(_intervals.at(reg_num) == NULL, "overwriting exisiting interval");
+
+	Interval* interval = new Interval(reg_num);
+	_intervals.at_put(reg_num, interval);
+
+	// assign register number for precolored intervals
+	if (reg_num < LIR_OprDesc::vreg_base) {
+		interval->assign_reg(reg_num);
+	}
+	return interval;
 }

 // assign a new reg_num to the interval and append it to the list of intervals
 // (only used for child intervals that are created during register allocation)
 void LinearScan::append_interval(Interval* it) {
-  it->set_reg_num(_intervals.length());
-  _intervals.append(it);
-  _new_intervals_from_allocation->append(it);
+	it->set_reg_num(_intervals.length());
+	_intervals.append(it);
+	_new_intervals_from_allocation->append(it);
 }

 // copy the vreg-flags if an interval is split
 void LinearScan::copy_register_flags(Interval* from, Interval* to) {
-  if (gen()->is_vreg_flag_set(from->reg_num(), LIRGenerator::byte_reg)) {
-    gen()->set_vreg_flag(to->reg_num(), LIRGenerator::byte_reg);
-  }
-  if (gen()->is_vreg_flag_set(from->reg_num(), LIRGenerator::callee_saved)) {
-    gen()->set_vreg_flag(to->reg_num(), LIRGenerator::callee_saved);
-  }
-
-  // Note: do not copy the must_start_in_memory flag because it is not necessary for child
-  //       intervals (only the very beginning of the interval must be in memory)
+	if (gen()->is_vreg_flag_set(from->reg_num(), LIRGenerator::byte_reg)) {
+		gen()->set_vreg_flag(to->reg_num(), LIRGenerator::byte_reg);
+	}
+	if (gen()->is_vreg_flag_set(from->reg_num(), LIRGenerator::callee_saved)) {
+		gen()->set_vreg_flag(to->reg_num(), LIRGenerator::callee_saved);
+	}
+
+	// Note: do not copy the must_start_in_memory flag because it is not necessary for child
+	//       intervals (only the very beginning of the interval must be in memory)
 }


@@ -295,170 +296,170 @@

 // called during building of intervals
 void LinearScan::change_spill_definition_pos(Interval* interval, int def_pos) {
-  assert(interval->is_split_parent(), "can only be called for split parents");
-
-  switch (interval->spill_state()) {
-    case noDefinitionFound:
-      assert(interval->spill_definition_pos() == -1, "must no be set before");
-      interval->set_spill_definition_pos(def_pos);
-      interval->set_spill_state(oneDefinitionFound);
-      break;
-
-    case oneDefinitionFound:
-      assert(def_pos <= interval->spill_definition_pos(), "positions are processed in reverse order when intervals are created");
-      if (def_pos < interval->spill_definition_pos() - 2) {
-        // second definition found, so no spill optimization possible for this interval
-        interval->set_spill_state(noOptimization);
-      } else {
-        // two consecutive definitions (because of two-operand LIR form)
-        assert(block_of_op_with_id(def_pos) == block_of_op_with_id(interval->spill_definition_pos()), "block must be equal");
-      }
-      break;
-
-    case noOptimization:
-      // nothing to do
-      break;
-
-    default:
-      assert(false, "other states not allowed at this time");
-  }
+	assert(interval->is_split_parent(), "can only be called for split parents");
+
+	switch (interval->spill_state()) {
+		case noDefinitionFound:
+			assert(interval->spill_definition_pos() == -1, "must no be set before");
+			interval->set_spill_definition_pos(def_pos);
+			interval->set_spill_state(oneDefinitionFound);
+			break;
+
+		case oneDefinitionFound:
+			assert(def_pos <= interval->spill_definition_pos(), "positions are processed in reverse order when intervals are created");
+			if (def_pos < interval->spill_definition_pos() - 2) {
+				// second definition found, so no spill optimization possible for this interval
+				interval->set_spill_state(noOptimization);
+			} else {
+				// two consecutive definitions (because of two-operand LIR form)
+				assert(block_of_op_with_id(def_pos) == block_of_op_with_id(interval->spill_definition_pos()), "block must be equal");
+			}
+			break;
+
+		case noOptimization:
+			// nothing to do
+			break;
+
+		default:
+			assert(false, "other states not allowed at this time");
+	}
 }

 // called during register allocation
 void LinearScan::change_spill_state(Interval* interval, int spill_pos) {
-  switch (interval->spill_state()) {
-    case oneDefinitionFound: {
-      int def_loop_depth = block_of_op_with_id(interval->spill_definition_pos())->loop_depth();
-      int spill_loop_depth = block_of_op_with_id(spill_pos)->loop_depth();
-
-      if (def_loop_depth < spill_loop_depth) {
-        // the loop depth of the spilling position is higher then the loop depth
-        // at the definition of the interval -> move write to memory out of loop
-        // by storing at definitin of the interval
-        interval->set_spill_state(storeAtDefinition);
-      } else {
-        // the interval is currently spilled only once, so for now there is no
-        // reason to store the interval at the definition
-        interval->set_spill_state(oneMoveInserted);
-      }
-      break;
-    }
-
-    case oneMoveInserted: {
-      // the interval is spilled more then once, so it is better to store it to
-      // memory at the definition
-      interval->set_spill_state(storeAtDefinition);
-      break;
-    }
-
-    case storeAtDefinition:
-    case startInMemory:
-    case noOptimization:
-    case noDefinitionFound:
-      // nothing to do
-      break;
-
-    default:
-      assert(false, "other states not allowed at this time");
-  }
+	switch (interval->spill_state()) {
+		case oneDefinitionFound: {
+						 int def_loop_depth = block_of_op_with_id(interval->spill_definition_pos())->loop_depth();
+						 int spill_loop_depth = block_of_op_with_id(spill_pos)->loop_depth();
+
+						 if (def_loop_depth < spill_loop_depth) {
+							 // the loop depth of the spilling position is higher then the loop depth
+							 // at the definition of the interval -> move write to memory out of loop
+							 // by storing at definitin of the interval
+							 interval->set_spill_state(storeAtDefinition);
+						 } else {
+							 // the interval is currently spilled only once, so for now there is no
+							 // reason to store the interval at the definition
+							 interval->set_spill_state(oneMoveInserted);
+						 }
+						 break;
+					 }
+
+		case oneMoveInserted: {
+					      // the interval is spilled more then once, so it is better to store it to
+					      // memory at the definition
+					      interval->set_spill_state(storeAtDefinition);
+					      break;
+				      }
+
+		case storeAtDefinition:
+		case startInMemory:
+		case noOptimization:
+		case noDefinitionFound:
+				      // nothing to do
+				      break;
+
+		default:
+				      assert(false, "other states not allowed at this time");
+	}
 }


 bool LinearScan::must_store_at_definition(const Interval* i) {
-  return i->is_split_parent() && i->spill_state() == storeAtDefinition;
+	return i->is_split_parent() && i->spill_state() == storeAtDefinition;
 }

 // called once before asignment of register numbers
 void LinearScan::eliminate_spill_moves() {
-  TIME_LINEAR_SCAN(timer_eliminate_spill_moves);
-  TRACE_LINEAR_SCAN(3, tty->print_cr("***** Eliminating unnecessary spill moves"));
-
-  // collect all intervals that must be stored after their definion.
-  // the list is sorted by Interval::spill_definition_pos
-  Interval* interval;
-  Interval* temp_list;
-  create_unhandled_lists(&interval, &temp_list, must_store_at_definition, NULL);
+	TIME_LINEAR_SCAN(timer_eliminate_spill_moves);
+	TRACE_LINEAR_SCAN(3, tty->print_cr("***** Eliminating unnecessary spill moves"));
+
+	// collect all intervals that must be stored after their definion.
+	// the list is sorted by Interval::spill_definition_pos
+	Interval* interval;
+	Interval* temp_list;
+	create_unhandled_lists(&interval, &temp_list, must_store_at_definition, NULL);

 #ifdef ASSERT
-  Interval* prev = NULL;
-  Interval* temp = interval;
-  while (temp != Interval::end()) {
-    assert(temp->spill_definition_pos() > 0, "invalid spill definition pos");
-    if (prev != NULL) {
-      assert(temp->from() >= prev->from(), "intervals not sorted");
-      assert(temp->spill_definition_pos() >= prev->spill_definition_pos(), "when intervals are sorted by from, then they must also be sorted by spill_definition_pos");
-    }
-
-    assert(temp->canonical_spill_slot() >= LinearScan::nof_regs, "interval has no spill slot assigned");
-    assert(temp->spill_definition_pos() >= temp->from(), "invalid order");
-    assert(temp->spill_definition_pos() <= temp->from() + 2, "only intervals defined once at their start-pos can be optimized");
-
-    TRACE_LINEAR_SCAN(4, tty->print_cr("interval %d (from %d to %d) must be stored at %d", temp->reg_num(), temp->from(), temp->to(), temp->spill_definition_pos()));
-
-    temp = temp->next();
-  }
+	Interval* prev = NULL;
+	Interval* temp = interval;
+	while (temp != Interval::end()) {
+		assert(temp->spill_definition_pos() > 0, "invalid spill definition pos");
+		if (prev != NULL) {
+			assert(temp->from() >= prev->from(), "intervals not sorted");
+			assert(temp->spill_definition_pos() >= prev->spill_definition_pos(), "when intervals are sorted by from, then they must also be sorted by spill_definition_pos");
+		}
+
+		assert(temp->canonical_spill_slot() >= LinearScan::nof_regs, "interval has no spill slot assigned");
+		assert(temp->spill_definition_pos() >= temp->from(), "invalid order");
+		assert(temp->spill_definition_pos() <= temp->from() + 2, "only intervals defined once at their start-pos can be optimized");
+
+		TRACE_LINEAR_SCAN(4, tty->print_cr("interval %d (from %d to %d) must be stored at %d", temp->reg_num(), temp->from(), temp->to(), temp->spill_definition_pos()));
+
+		temp = temp->next();
+	}
 #endif

-  LIR_InsertionBuffer insertion_buffer;
-  int num_blocks = block_count();
-  for (int i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-    LIR_OpList* instructions = block->lir()->instructions_list();
-    int         num_inst = instructions->length();
-    bool        has_new = false;
-
-    // iterate all instructions of the block. skip the first because it is always a label
-    for (int j = 1; j < num_inst; j++) {
-      LIR_Op* op = instructions->at(j);
-      int op_id = op->id();
-
-      if (op_id == -1) {
-        // remove move from register to stack if the stack slot is guaranteed to be correct.
-        // only moves that have been inserted by LinearScan can be removed.
-        assert(op->code() == lir_move, "only moves can have a op_id of -1");
-        assert(op->as_Op1() != NULL, "move must be LIR_Op1");
-        assert(op->as_Op1()->result_opr()->is_virtual(), "LinearScan inserts only moves to virtual registers");
-
-        LIR_Op1* op1 = (LIR_Op1*)op;
-        Interval* interval = interval_at(op1->result_opr()->vreg_number());
-
-        if (interval->assigned_reg() >= LinearScan::nof_regs && interval->always_in_memory()) {
-          // move target is a stack slot that is always correct, so eliminate instruction
-          TRACE_LINEAR_SCAN(4, tty->print_cr("eliminating move from interval %d to %d", op1->in_opr()->vreg_number(), op1->result_opr()->vreg_number()));
-          instructions->at_put(j, NULL); // NULL-instructions are deleted by assign_reg_num
-        }
-
-      } else {
-        // insert move from register to stack just after the beginning of the interval
-        assert(interval == Interval::end() || interval->spill_definition_pos() >= op_id, "invalid order");
-        assert(interval == Interval::end() || (interval->is_split_parent() && interval->spill_state() == storeAtDefinition), "invalid interval");
-
-        while (interval != Interval::end() && interval->spill_definition_pos() == op_id) {
-          if (!has_new) {
-            // prepare insertion buffer (appended when all instructions of the block are processed)
-            insertion_buffer.init(block->lir());
-            has_new = true;
-          }
-
-          LIR_Opr from_opr = operand_for_interval(interval);
-          LIR_Opr to_opr = canonical_spill_opr(interval);
-          assert(from_opr->is_fixed_cpu() || from_opr->is_fixed_fpu(), "from operand must be a register");
-          assert(to_opr->is_stack(), "to operand must be a stack slot");
-
-          insertion_buffer.move(j, from_opr, to_opr);
-          TRACE_LINEAR_SCAN(4, tty->print_cr("inserting move after definition of interval %d to stack slot %d at op_id %d", interval->reg_num(), interval->canonical_spill_slot() - LinearScan::nof_regs, op_id));
-
-          interval = interval->next();
-        }
-      }
-    } // end of instruction iteration
-
-    if (has_new) {
-      block->lir()->append(&insertion_buffer);
-    }
-  } // end of block iteration
-
-  assert(interval == Interval::end(), "missed an interval");
+	LIR_InsertionBuffer insertion_buffer;
+	int num_blocks = block_count();
+	for (int i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+		LIR_OpList* instructions = block->lir()->instructions_list();
+		int         num_inst = instructions->length();
+		bool        has_new = false;
+
+		// iterate all instructions of the block. skip the first because it is always a label
+		for (int j = 1; j < num_inst; j++) {
+			LIR_Op* op = instructions->at(j);
+			int op_id = op->id();
+
+			if (op_id == -1) {
+				// remove move from register to stack if the stack slot is guaranteed to be correct.
+				// only moves that have been inserted by LinearScan can be removed.
+				assert(op->code() == lir_move, "only moves can have a op_id of -1");
+				assert(op->as_Op1() != NULL, "move must be LIR_Op1");
+				assert(op->as_Op1()->result_opr()->is_virtual(), "LinearScan inserts only moves to virtual registers");
+
+				LIR_Op1* op1 = (LIR_Op1*)op;
+				Interval* interval = interval_at(op1->result_opr()->vreg_number());
+
+				if (interval->assigned_reg() >= LinearScan::nof_regs && interval->always_in_memory()) {
+					// move target is a stack slot that is always correct, so eliminate instruction
+					TRACE_LINEAR_SCAN(4, tty->print_cr("eliminating move from interval %d to %d", op1->in_opr()->vreg_number(), op1->result_opr()->vreg_number()));
+					instructions->at_put(j, NULL); // NULL-instructions are deleted by assign_reg_num
+				}
+
+			} else {
+				// insert move from register to stack just after the beginning of the interval
+				assert(interval == Interval::end() || interval->spill_definition_pos() >= op_id, "invalid order");
+				assert(interval == Interval::end() || (interval->is_split_parent() && interval->spill_state() == storeAtDefinition), "invalid interval");
+
+				while (interval != Interval::end() && interval->spill_definition_pos() == op_id) {
+					if (!has_new) {
+						// prepare insertion buffer (appended when all instructions of the block are processed)
+						insertion_buffer.init(block->lir());
+						has_new = true;
+					}
+
+					LIR_Opr from_opr = operand_for_interval(interval);
+					LIR_Opr to_opr = canonical_spill_opr(interval);
+					assert(from_opr->is_fixed_cpu() || from_opr->is_fixed_fpu(), "from operand must be a register");
+					assert(to_opr->is_stack(), "to operand must be a stack slot");
+
+					insertion_buffer.move(j, from_opr, to_opr);
+					TRACE_LINEAR_SCAN(4, tty->print_cr("inserting move after definition of interval %d to stack slot %d at op_id %d", interval->reg_num(), interval->canonical_spill_slot() - LinearScan::nof_regs, op_id));
+
+					interval = interval->next();
+				}
+			}
+		} // end of instruction iteration
+
+		if (has_new) {
+			block->lir()->append(&insertion_buffer);
+		}
+	} // end of block iteration
+
+	assert(interval == Interval::end(), "missed an interval");
 }


@@ -466,52 +467,52 @@
 // Compute depth-first and linear scan block orders, and number LIR_Op nodes for linear scan.

 void LinearScan::number_instructions() {
-  {
-    // dummy-timer to measure the cost of the timer itself
-    // (this time is then subtracted from all other timers to get the real value)
-    TIME_LINEAR_SCAN(timer_do_nothing);
-  }
-  TIME_LINEAR_SCAN(timer_number_instructions);
-
-  // Assign IDs to LIR nodes and build a mapping, lir_ops, from ID to LIR_Op node.
-  int num_blocks = block_count();
-  int num_instructions = 0;
-  int i;
-  for (i = 0; i < num_blocks; i++) {
-    num_instructions += block_at(i)->lir()->instructions_list()->length();
-  }
-
-  // initialize with correct length
-  _lir_ops = LIR_OpArray(num_instructions);
-  _block_of_op = BlockBeginArray(num_instructions);
-
-  int op_id = 0;
-  int idx = 0;
-
-  for (i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-    block->set_first_lir_instruction_id(op_id);
-    LIR_OpList* instructions = block->lir()->instructions_list();
-
-    int num_inst = instructions->length();
-    for (int j = 0; j < num_inst; j++) {
-      LIR_Op* op = instructions->at(j);
-      op->set_id(op_id);
-
-      _lir_ops.at_put(idx, op);
-      _block_of_op.at_put(idx, block);
-      assert(lir_op_with_id(op_id) == op, "must match");
-
-      idx++;
-      op_id += 2; // numbering of lir_ops by two
-    }
-    block->set_last_lir_instruction_id(op_id - 2);
-  }
-  assert(idx == num_instructions, "must match");
-  assert(idx * 2 == op_id, "must match");
-
-  _has_call = BitMap(num_instructions); _has_call.clear();
-  _has_info = BitMap(num_instructions); _has_info.clear();
+	{
+		// dummy-timer to measure the cost of the timer itself
+		// (this time is then subtracted from all other timers to get the real value)
+		TIME_LINEAR_SCAN(timer_do_nothing);
+	}
+	TIME_LINEAR_SCAN(timer_number_instructions);
+
+	// Assign IDs to LIR nodes and build a mapping, lir_ops, from ID to LIR_Op node.
+	int num_blocks = block_count();
+	int num_instructions = 0;
+	int i;
+	for (i = 0; i < num_blocks; i++) {
+		num_instructions += block_at(i)->lir()->instructions_list()->length();
+	}
+
+	// initialize with correct length
+	_lir_ops = LIR_OpArray(num_instructions);
+	_block_of_op = BlockBeginArray(num_instructions);
+
+	int op_id = 0;
+	int idx = 0;
+
+	for (i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+		block->set_first_lir_instruction_id(op_id);
+		LIR_OpList* instructions = block->lir()->instructions_list();
+
+		int num_inst = instructions->length();
+		for (int j = 0; j < num_inst; j++) {
+			LIR_Op* op = instructions->at(j);
+			op->set_id(op_id);
+
+			_lir_ops.at_put(idx, op);
+			_block_of_op.at_put(idx, block);
+			assert(lir_op_with_id(op_id) == op, "must match");
+
+			idx++;
+			op_id += 2; // numbering of lir_ops by two
+		}
+		block->set_last_lir_instruction_id(op_id - 2);
+	}
+	assert(idx == num_instructions, "must match");
+	assert(idx * 2 == op_id, "must match");
+
+	_has_call = BitMap(num_instructions); _has_call.clear();
+	_has_info = BitMap(num_instructions); _has_info.clear();
 }


@@ -519,201 +520,201 @@
 // (sets live_gen and live_kill for each block)

 void LinearScan::set_live_gen_kill(Value value, LIR_Op* op, BitMap& live_gen, BitMap& live_kill) {
-  LIR_Opr opr = value->operand();
-  Constant* con = value->as_Constant();
-
-  // check some asumptions about debug information
-  assert(!value->type()->is_illegal(), "if this local is used by the interpreter it shouldn't be of indeterminate type");
-  assert(con == NULL || opr->is_virtual() || opr->is_constant() || opr->is_illegal(), "asumption: Constant instructions have only constant operands");
-  assert(con != NULL || opr->is_virtual(), "asumption: non-Constant instructions have only virtual operands");
-
-  if ((con == NULL || con->is_pinned()) && opr->is_register()) {
-    assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
-    int reg = opr->vreg_number();
-    if (!live_kill.at(reg)) {
-      live_gen.set_bit(reg);
-      TRACE_LINEAR_SCAN(4, tty->print_cr("  Setting live_gen for value %c%d, LIR op_id %d, register number %d", value->type()->tchar(), value->id(), op->id(), reg));
-    }
-  }
+	LIR_Opr opr = value->operand();
+	Constant* con = value->as_Constant();
+
+	// check some asumptions about debug information
+	assert(!value->type()->is_illegal(), "if this local is used by the interpreter it shouldn't be of indeterminate type");
+	assert(con == NULL || opr->is_virtual() || opr->is_constant() || opr->is_illegal(), "asumption: Constant instructions have only constant operands");
+	assert(con != NULL || opr->is_virtual(), "asumption: non-Constant instructions have only virtual operands");
+
+	if ((con == NULL || con->is_pinned()) && opr->is_register()) {
+		assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
+		int reg = opr->vreg_number();
+		if (!live_kill.at(reg)) {
+			live_gen.set_bit(reg);
+			TRACE_LINEAR_SCAN(4, tty->print_cr("  Setting live_gen for value %c%d, LIR op_id %d, register number %d", value->type()->tchar(), value->id(), op->id(), reg));
+		}
+	}
 }


 void LinearScan::compute_local_live_sets() {
-  TIME_LINEAR_SCAN(timer_compute_local_live_sets);
-
-  int  num_blocks = block_count();
-  int  live_size = live_set_size();
-  bool local_has_fpu_registers = false;
-  int  local_num_calls = 0;
-  LIR_OpVisitState visitor;
-
-  BitMap2D local_interval_in_loop = BitMap2D(_num_virtual_regs, num_loops());
-  local_interval_in_loop.clear();
-
-  // iterate all blocks
-  for (int i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-
-    BitMap live_gen(live_size);  live_gen.clear();
-    BitMap live_kill(live_size); live_kill.clear();
-
-    if (block->is_set(BlockBegin::exception_entry_flag)) {
-      // Phi functions at the begin of an exception handler are
-      // implicitly defined (= killed) at the beginning of the block.
-      for_each_phi_fun(block, phi,
-        live_kill.set_bit(phi->operand()->vreg_number())
-      );
-    }
-
-    LIR_OpList* instructions = block->lir()->instructions_list();
-    int num_inst = instructions->length();
-
-    // iterate all instructions of the block. skip the first because it is always a label
-    assert(visitor.no_operands(instructions->at(0)), "first operation must always be a label");
-    for (int j = 1; j < num_inst; j++) {
-      LIR_Op* op = instructions->at(j);
-
-      // visit operation to collect all operands
-      visitor.visit(op);
-
-      if (visitor.has_call()) {
-        _has_call.set_bit(op->id() >> 1);
-        local_num_calls++;
-      }
-      if (visitor.info_count() > 0) {
-        _has_info.set_bit(op->id() >> 1);
-      }
-
-      // iterate input operands of instruction
-      int k, n, reg;
-      n = visitor.opr_count(LIR_OpVisitState::inputMode);
-      for (k = 0; k < n; k++) {
-        LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::inputMode, k);
-        assert(opr->is_register(), "visitor should only return register operands");
-
-        if (opr->is_virtual_register()) {
-          assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
-          reg = opr->vreg_number();
-          if (!live_kill.at(reg)) {
-            live_gen.set_bit(reg);
-            TRACE_LINEAR_SCAN(4, tty->print_cr("  Setting live_gen for register %d at instruction %d", reg, op->id()));
-          }
-          if (block->loop_index() >= 0) {
-            local_interval_in_loop.set_bit(reg, block->loop_index());
-          }
-          local_has_fpu_registers = local_has_fpu_registers || opr->is_virtual_fpu();
-        }
+	TIME_LINEAR_SCAN(timer_compute_local_live_sets);
+
+	int  num_blocks = block_count();
+	int  live_size = live_set_size();
+	bool local_has_fpu_registers = false;
+	int  local_num_calls = 0;
+	LIR_OpVisitState visitor;
+
+	BitMap2D local_interval_in_loop = BitMap2D(_num_virtual_regs, num_loops());
+	local_interval_in_loop.clear();
+
+	// iterate all blocks
+	for (int i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+
+		BitMap live_gen(live_size);  live_gen.clear();
+		BitMap live_kill(live_size); live_kill.clear();
+
+		if (block->is_set(BlockBegin::exception_entry_flag)) {
+			// Phi functions at the begin of an exception handler are
+			// implicitly defined (= killed) at the beginning of the block.
+			for_each_phi_fun(block, phi,
+					live_kill.set_bit(phi->operand()->vreg_number())
+					);
+		}
+
+		LIR_OpList* instructions = block->lir()->instructions_list();
+		int num_inst = instructions->length();
+
+		// iterate all instructions of the block. skip the first because it is always a label
+		assert(visitor.no_operands(instructions->at(0)), "first operation must always be a label");
+		for (int j = 1; j < num_inst; j++) {
+			LIR_Op* op = instructions->at(j);
+
+			// visit operation to collect all operands
+			visitor.visit(op);
+
+			if (visitor.has_call()) {
+				_has_call.set_bit(op->id() >> 1);
+				local_num_calls++;
+			}
+			if (visitor.info_count() > 0) {
+				_has_info.set_bit(op->id() >> 1);
+			}
+
+			// iterate input operands of instruction
+			int k, n, reg;
+			n = visitor.opr_count(LIR_OpVisitState::inputMode);
+			for (k = 0; k < n; k++) {
+				LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::inputMode, k);
+				assert(opr->is_register(), "visitor should only return register operands");
+
+				if (opr->is_virtual_register()) {
+					assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
+					reg = opr->vreg_number();
+					if (!live_kill.at(reg)) {
+						live_gen.set_bit(reg);
+						TRACE_LINEAR_SCAN(4, tty->print_cr("  Setting live_gen for register %d at instruction %d", reg, op->id()));
+					}
+					if (block->loop_index() >= 0) {
+						local_interval_in_loop.set_bit(reg, block->loop_index());
+					}
+					local_has_fpu_registers = local_has_fpu_registers || opr->is_virtual_fpu();
+				}

 #ifdef ASSERT
-        // fixed intervals are never live at block boundaries, so
-        // they need not be processed in live sets.
-        // this is checked by these assertions to be sure about it.
-        // the entry block may have incoming values in registers, which is ok.
-        if (!opr->is_virtual_register() && block != ir()->start()) {
-          reg = reg_num(opr);
-          if (is_processed_reg_num(reg)) {
-            assert(live_kill.at(reg), "using fixed register that is not defined in this block");
-          }
-          reg = reg_numHi(opr);
-          if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
-            assert(live_kill.at(reg), "using fixed register that is not defined in this block");
-          }
-        }
+				// fixed intervals are never live at block boundaries, so
+				// they need not be processed in live sets.
+				// this is checked by these assertions to be sure about it.
+				// the entry block may have incoming values in registers, which is ok.
+				if (!opr->is_virtual_register() && block != ir()->start()) {
+					reg = reg_num(opr);
+					if (is_processed_reg_num(reg)) {
+						assert(live_kill.at(reg), "using fixed register that is not defined in this block");
+					}
+					reg = reg_numHi(opr);
+					if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
+						assert(live_kill.at(reg), "using fixed register that is not defined in this block");
+					}
+				}
 #endif
-      }
-
-      // Add uses of live locals from interpreter's point of view for proper debug information generation
-      n = visitor.info_count();
-      for (k = 0; k < n; k++) {
-        CodeEmitInfo* info = visitor.info_at(k);
-        ValueStack* stack = info->stack();
-        for_each_state_value(stack, value,
-          set_live_gen_kill(value, op, live_gen, live_kill)
-        );
-      }
-
-      // iterate temp operands of instruction
-      n = visitor.opr_count(LIR_OpVisitState::tempMode);
-      for (k = 0; k < n; k++) {
-        LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::tempMode, k);
-        assert(opr->is_register(), "visitor should only return register operands");
-
-        if (opr->is_virtual_register()) {
-          assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
-          reg = opr->vreg_number();
-          live_kill.set_bit(reg);
-          if (block->loop_index() >= 0) {
-            local_interval_in_loop.set_bit(reg, block->loop_index());
-          }
-          local_has_fpu_registers = local_has_fpu_registers || opr->is_virtual_fpu();
-        }
+			}
+
+			// Add uses of live locals from interpreter's point of view for proper debug information generation
+			n = visitor.info_count();
+			for (k = 0; k < n; k++) {
+				CodeEmitInfo* info = visitor.info_at(k);
+				ValueStack* stack = info->stack();
+				for_each_state_value(stack, value,
+						set_live_gen_kill(value, op, live_gen, live_kill)
+						);
+			}
+
+			// iterate temp operands of instruction
+			n = visitor.opr_count(LIR_OpVisitState::tempMode);
+			for (k = 0; k < n; k++) {
+				LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::tempMode, k);
+				assert(opr->is_register(), "visitor should only return register operands");
+
+				if (opr->is_virtual_register()) {
+					assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
+					reg = opr->vreg_number();
+					live_kill.set_bit(reg);
+					if (block->loop_index() >= 0) {
+						local_interval_in_loop.set_bit(reg, block->loop_index());
+					}
+					local_has_fpu_registers = local_has_fpu_registers || opr->is_virtual_fpu();
+				}

 #ifdef ASSERT
-        // fixed intervals are never live at block boundaries, so
-        // they need not be processed in live sets
-        // process them only in debug mode so that this can be checked
-        if (!opr->is_virtual_register()) {
-          reg = reg_num(opr);
-          if (is_processed_reg_num(reg)) {
-            live_kill.set_bit(reg_num(opr));
-          }
-          reg = reg_numHi(opr);
-          if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
-            live_kill.set_bit(reg);
-          }
-        }
+				// fixed intervals are never live at block boundaries, so
+				// they need not be processed in live sets
+				// process them only in debug mode so that this can be checked
+				if (!opr->is_virtual_register()) {
+					reg = reg_num(opr);
+					if (is_processed_reg_num(reg)) {
+						live_kill.set_bit(reg_num(opr));
+					}
+					reg = reg_numHi(opr);
+					if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
+						live_kill.set_bit(reg);
+					}
+				}
 #endif
-      }
-
-      // iterate output operands of instruction
-      n = visitor.opr_count(LIR_OpVisitState::outputMode);
-      for (k = 0; k < n; k++) {
-        LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::outputMode, k);
-        assert(opr->is_register(), "visitor should only return register operands");
-
-        if (opr->is_virtual_register()) {
-          assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
-          reg = opr->vreg_number();
-          live_kill.set_bit(reg);
-          if (block->loop_index() >= 0) {
-            local_interval_in_loop.set_bit(reg, block->loop_index());
-          }
-          local_has_fpu_registers = local_has_fpu_registers || opr->is_virtual_fpu();
-        }
+			}
+
+			// iterate output operands of instruction
+			n = visitor.opr_count(LIR_OpVisitState::outputMode);
+			for (k = 0; k < n; k++) {
+				LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::outputMode, k);
+				assert(opr->is_register(), "visitor should only return register operands");
+
+				if (opr->is_virtual_register()) {
+					assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
+					reg = opr->vreg_number();
+					live_kill.set_bit(reg);
+					if (block->loop_index() >= 0) {
+						local_interval_in_loop.set_bit(reg, block->loop_index());
+					}
+					local_has_fpu_registers = local_has_fpu_registers || opr->is_virtual_fpu();
+				}

 #ifdef ASSERT
-        // fixed intervals are never live at block boundaries, so
-        // they need not be processed in live sets
-        // process them only in debug mode so that this can be checked
-        if (!opr->is_virtual_register()) {
-          reg = reg_num(opr);
-          if (is_processed_reg_num(reg)) {
-            live_kill.set_bit(reg_num(opr));
-          }
-          reg = reg_numHi(opr);
-          if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
-            live_kill.set_bit(reg);
-          }
-        }
+				// fixed intervals are never live at block boundaries, so
+				// they need not be processed in live sets
+				// process them only in debug mode so that this can be checked
+				if (!opr->is_virtual_register()) {
+					reg = reg_num(opr);
+					if (is_processed_reg_num(reg)) {
+						live_kill.set_bit(reg_num(opr));
+					}
+					reg = reg_numHi(opr);
+					if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
+						live_kill.set_bit(reg);
+					}
+				}
 #endif
-      }
-    } // end of instruction iteration
-
-    block->set_live_gen (live_gen);
-    block->set_live_kill(live_kill);
-    block->set_live_in  (BitMap(live_size)); block->live_in().clear();
-    block->set_live_out (BitMap(live_size)); block->live_out().clear();
-
-    TRACE_LINEAR_SCAN(4, tty->print("live_gen  B%d ", block->block_id()); print_bitmap(block->live_gen()));
-    TRACE_LINEAR_SCAN(4, tty->print("live_kill B%d ", block->block_id()); print_bitmap(block->live_kill()));
-  } // end of block iteration
-
-  // propagate local calculated information into LinearScan object
-  _has_fpu_registers = local_has_fpu_registers;
-  compilation()->set_has_fpu_code(local_has_fpu_registers);
-
-  _num_calls = local_num_calls;
-  _interval_in_loop = local_interval_in_loop;
+			}
+		} // end of instruction iteration
+
+		block->set_live_gen (live_gen);
+		block->set_live_kill(live_kill);
+		block->set_live_in  (BitMap(live_size)); block->live_in().clear();
+		block->set_live_out (BitMap(live_size)); block->live_out().clear();
+
+		TRACE_LINEAR_SCAN(4, tty->print("live_gen  B%d ", block->block_id()); print_bitmap(block->live_gen()));
+		TRACE_LINEAR_SCAN(4, tty->print("live_kill B%d ", block->block_id()); print_bitmap(block->live_kill()));
+	} // end of block iteration
+
+	// propagate local calculated information into LinearScan object
+	_has_fpu_registers = local_has_fpu_registers;
+	compilation()->set_has_fpu_code(local_has_fpu_registers);
+
+	_num_calls = local_num_calls;
+	_interval_in_loop = local_interval_in_loop;
 }


@@ -721,129 +722,129 @@
 // (sets live_in and live_out for each block)

 void LinearScan::compute_global_live_sets() {
-  TIME_LINEAR_SCAN(timer_compute_global_live_sets);
-
-  int  num_blocks = block_count();
-  bool change_occurred;
-  bool change_occurred_in_block;
-  int  iteration_count = 0;
-  BitMap live_out(live_set_size()); live_out.clear(); // scratch set for calculations
-
-  // Perform a backward dataflow analysis to compute live_out and live_in for each block.
-  // The loop is executed until a fixpoint is reached (no changes in an iteration)
-  // Exception handlers must be processed because not all live values are
-  // present in the state array, e.g. because of global value numbering
-  do {
-    change_occurred = false;
-
-    // iterate all blocks in reverse order
-    for (int i = num_blocks - 1; i >= 0; i--) {
-      BlockBegin* block = block_at(i);
-
-      change_occurred_in_block = false;
-
-      // live_out(block) is the union of live_in(sux), for successors sux of block
-      int n = block->number_of_sux();
-      int e = block->number_of_exception_handlers();
-      if (n + e > 0) {
-        // block has successors
-        if (n > 0) {
-          live_out.set_from(block->sux_at(0)->live_in());
-          for (int j = 1; j < n; j++) {
-            live_out.set_union(block->sux_at(j)->live_in());
-          }
-        } else {
-          live_out.clear();
-        }
-        for (int j = 0; j < e; j++) {
-          live_out.set_union(block->exception_handler_at(j)->live_in());
-        }
-
-        if (!block->live_out().is_same(live_out)) {
-          // A change occurred.  Swap the old and new live out sets to avoid copying.
-          BitMap temp = block->live_out();
-          block->set_live_out(live_out);
-          live_out = temp;
-
-          change_occurred = true;
-          change_occurred_in_block = true;
-        }
-      }
-
-      if (iteration_count == 0 || change_occurred_in_block) {
-        // live_in(block) is the union of live_gen(block) with (live_out(block) & !live_kill(block))
-        // note: live_in has to be computed only in first iteration or if live_out has changed!
-        BitMap live_in = block->live_in();
-        live_in.set_from(block->live_out());
-        live_in.set_difference(block->live_kill());
-        live_in.set_union(block->live_gen());
-      }
+	TIME_LINEAR_SCAN(timer_compute_global_live_sets);
+
+	int  num_blocks = block_count();
+	bool change_occurred;
+	bool change_occurred_in_block;
+	int  iteration_count = 0;
+	BitMap live_out(live_set_size()); live_out.clear(); // scratch set for calculations
+
+	// Perform a backward dataflow analysis to compute live_out and live_in for each block.
+	// The loop is executed until a fixpoint is reached (no changes in an iteration)
+	// Exception handlers must be processed because not all live values are
+	// present in the state array, e.g. because of global value numbering
+	do {
+		change_occurred = false;
+
+		// iterate all blocks in reverse order
+		for (int i = num_blocks - 1; i >= 0; i--) {
+			BlockBegin* block = block_at(i);
+
+			change_occurred_in_block = false;
+
+			// live_out(block) is the union of live_in(sux), for successors sux of block
+			int n = block->number_of_sux();
+			int e = block->number_of_exception_handlers();
+			if (n + e > 0) {
+				// block has successors
+				if (n > 0) {
+					live_out.set_from(block->sux_at(0)->live_in());
+					for (int j = 1; j < n; j++) {
+						live_out.set_union(block->sux_at(j)->live_in());
+					}
+				} else {
+					live_out.clear();
+				}
+				for (int j = 0; j < e; j++) {
+					live_out.set_union(block->exception_handler_at(j)->live_in());
+				}
+
+				if (!block->live_out().is_same(live_out)) {
+					// A change occurred.  Swap the old and new live out sets to avoid copying.
+					BitMap temp = block->live_out();
+					block->set_live_out(live_out);
+					live_out = temp;
+
+					change_occurred = true;
+					change_occurred_in_block = true;
+				}
+			}
+
+			if (iteration_count == 0 || change_occurred_in_block) {
+				// live_in(block) is the union of live_gen(block) with (live_out(block) & !live_kill(block))
+				// note: live_in has to be computed only in first iteration or if live_out has changed!
+				BitMap live_in = block->live_in();
+				live_in.set_from(block->live_out());
+				live_in.set_difference(block->live_kill());
+				live_in.set_union(block->live_gen());
+			}

 #ifndef PRODUCT
-      if (TraceLinearScanLevel >= 4) {
-        char c = ' ';
-        if (iteration_count == 0 || change_occurred_in_block) {
-          c = '*';
-        }
-        tty->print("(%d) live_in%c  B%d ", iteration_count, c, block->block_id()); print_bitmap(block->live_in());
-        tty->print("(%d) live_out%c B%d ", iteration_count, c, block->block_id()); print_bitmap(block->live_out());
-      }
+			if (TraceLinearScanLevel >= 4) {
+				char c = ' ';
+				if (iteration_count == 0 || change_occurred_in_block) {
+					c = '*';
+				}
+				tty->print("(%d) live_in%c  B%d ", iteration_count, c, block->block_id()); print_bitmap(block->live_in());
+				tty->print("(%d) live_out%c B%d ", iteration_count, c, block->block_id()); print_bitmap(block->live_out());
+			}
 #endif
-    }
-    iteration_count++;
-
-    if (change_occurred && iteration_count > 50) {
-      BAILOUT("too many iterations in compute_global_live_sets");
-    }
-  } while (change_occurred);
+		}
+		iteration_count++;
+
+		if (change_occurred && iteration_count > 50) {
+			BAILOUT("too many iterations in compute_global_live_sets");
+		}
+	} while (change_occurred);


 #ifdef ASSERT
-  // check that fixed intervals are not live at block boundaries
-  // (live set must be empty at fixed intervals)
-  for (int i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-    for (int j = 0; j < LIR_OprDesc::vreg_base; j++) {
-      assert(block->live_in().at(j)  == false, "live_in  set of fixed register must be empty");
-      assert(block->live_out().at(j) == false, "live_out set of fixed register must be empty");
-      assert(block->live_gen().at(j) == false, "live_gen set of fixed register must be empty");
-    }
-  }
+	// check that fixed intervals are not live at block boundaries
+	// (live set must be empty at fixed intervals)
+	for (int i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+		for (int j = 0; j < LIR_OprDesc::vreg_base; j++) {
+			assert(block->live_in().at(j)  == false, "live_in  set of fixed register must be empty");
+			assert(block->live_out().at(j) == false, "live_out set of fixed register must be empty");
+			assert(block->live_gen().at(j) == false, "live_gen set of fixed register must be empty");
+		}
+	}
 #endif

-  // check that the live_in set of the first block is empty
-  BitMap live_in_args(ir()->start()->live_in().size());
-  live_in_args.clear();
-  if (!ir()->start()->live_in().is_same(live_in_args)) {
+	// check that the live_in set of the first block is empty
+	BitMap live_in_args(ir()->start()->live_in().size());
+	live_in_args.clear();
+	if (!ir()->start()->live_in().is_same(live_in_args)) {
 #ifdef ASSERT
-    tty->print_cr("Error: live_in set of first block must be empty (when this fails, virtual registers are used before they are defined)");
-    tty->print_cr("affected registers:");
-    print_bitmap(ir()->start()->live_in());
-
-    // print some additional information to simplify debugging
-    for (unsigned int i = 0; i < ir()->start()->live_in().size(); i++) {
-      if (ir()->start()->live_in().at(i)) {
-        Instruction* instr = gen()->instruction_for_vreg(i);
-        tty->print_cr("* vreg %d (HIR instruction %c%d)", i, instr == NULL ? ' ' : instr->type()->tchar(), instr == NULL ? 0 : instr->id());
-
-        for (int j = 0; j < num_blocks; j++) {
-          BlockBegin* block = block_at(j);
-          if (block->live_gen().at(i)) {
-            tty->print_cr("  used in block B%d", block->block_id());
-          }
-          if (block->live_kill().at(i)) {
-            tty->print_cr("  defined in block B%d", block->block_id());
-          }
-        }
-      }
-    }
+		tty->print_cr("Error: live_in set of first block must be empty (when this fails, virtual registers are used before they are defined)");
+		tty->print_cr("affected registers:");
+		print_bitmap(ir()->start()->live_in());
+
+		// print some additional information to simplify debugging
+		for (unsigned int i = 0; i < ir()->start()->live_in().size(); i++) {
+			if (ir()->start()->live_in().at(i)) {
+				Instruction* instr = gen()->instruction_for_vreg(i);
+				tty->print_cr("* vreg %d (HIR instruction %c%d)", i, instr == NULL ? ' ' : instr->type()->tchar(), instr == NULL ? 0 : instr->id());
+
+				for (int j = 0; j < num_blocks; j++) {
+					BlockBegin* block = block_at(j);
+					if (block->live_gen().at(i)) {
+						tty->print_cr("  used in block B%d", block->block_id());
+					}
+					if (block->live_kill().at(i)) {
+						tty->print_cr("  defined in block B%d", block->block_id());
+					}
+				}
+			}
+		}

 #endif
-    // when this fails, virtual registers are used before they are defined.
-    assert(false, "live_in set of first block must be empty");
-    // bailout of if this occurs in product mode.
-    bailout("live_in set of first block not empty");
-  }
+		// when this fails, virtual registers are used before they are defined.
+		assert(false, "live_in set of first block must be empty");
+		// bailout of if this occurs in product mode.
+		bailout("live_in set of first block not empty");
+	}
 }


@@ -851,151 +852,151 @@
 // (fills the list _intervals)

 void LinearScan::add_use(Value value, int from, int to, IntervalUseKind use_kind) {
-  assert(!value->type()->is_illegal(), "if this value is used by the interpreter it shouldn't be of indeterminate type");
-  LIR_Opr opr = value->operand();
-  Constant* con = value->as_Constant();
-
-  if ((con == NULL || con->is_pinned()) && opr->is_register()) {
-    assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
-    add_use(opr, from, to, use_kind);
-  }
+	assert(!value->type()->is_illegal(), "if this value is used by the interpreter it shouldn't be of indeterminate type");
+	LIR_Opr opr = value->operand();
+	Constant* con = value->as_Constant();
+
+	if ((con == NULL || con->is_pinned()) && opr->is_register()) {
+		assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
+		add_use(opr, from, to, use_kind);
+	}
 }


 void LinearScan::add_def(LIR_Opr opr, int def_pos, IntervalUseKind use_kind) {
-  TRACE_LINEAR_SCAN(2, tty->print(" def "); opr->print(tty); tty->print_cr(" def_pos %d (%d)", def_pos, use_kind));
-  assert(opr->is_register(), "should not be called otherwise");
-
-  if (opr->is_virtual_register()) {
-    assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
-    add_def(opr->vreg_number(), def_pos, use_kind, opr->type_register());
-
-  } else {
-    int reg = reg_num(opr);
-    if (is_processed_reg_num(reg)) {
-      add_def(reg, def_pos, use_kind, opr->type_register());
-    }
-    reg = reg_numHi(opr);
-    if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
-      add_def(reg, def_pos, use_kind, opr->type_register());
-    }
-  }
+	TRACE_LINEAR_SCAN(2, tty->print(" def "); opr->print(tty); tty->print_cr(" def_pos %d (%d)", def_pos, use_kind));
+	assert(opr->is_register(), "should not be called otherwise");
+
+	if (opr->is_virtual_register()) {
+		assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
+		add_def(opr->vreg_number(), def_pos, use_kind, opr->type_register());
+
+	} else {
+		int reg = reg_num(opr);
+		if (is_processed_reg_num(reg)) {
+			add_def(reg, def_pos, use_kind, opr->type_register());
+		}
+		reg = reg_numHi(opr);
+		if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
+			add_def(reg, def_pos, use_kind, opr->type_register());
+		}
+	}
 }

 void LinearScan::add_use(LIR_Opr opr, int from, int to, IntervalUseKind use_kind) {
-  TRACE_LINEAR_SCAN(2, tty->print(" use "); opr->print(tty); tty->print_cr(" from %d to %d (%d)", from, to, use_kind));
-  assert(opr->is_register(), "should not be called otherwise");
-
-  if (opr->is_virtual_register()) {
-    assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
-    add_use(opr->vreg_number(), from, to, use_kind, opr->type_register());
-
-  } else {
-    int reg = reg_num(opr);
-    if (is_processed_reg_num(reg)) {
-      add_use(reg, from, to, use_kind, opr->type_register());
-    }
-    reg = reg_numHi(opr);
-    if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
-      add_use(reg, from, to, use_kind, opr->type_register());
-    }
-  }
+	TRACE_LINEAR_SCAN(2, tty->print(" use "); opr->print(tty); tty->print_cr(" from %d to %d (%d)", from, to, use_kind));
+	assert(opr->is_register(), "should not be called otherwise");
+
+	if (opr->is_virtual_register()) {
+		assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
+		add_use(opr->vreg_number(), from, to, use_kind, opr->type_register());
+
+	} else {
+		int reg = reg_num(opr);
+		if (is_processed_reg_num(reg)) {
+			add_use(reg, from, to, use_kind, opr->type_register());
+		}
+		reg = reg_numHi(opr);
+		if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
+			add_use(reg, from, to, use_kind, opr->type_register());
+		}
+	}
 }

 void LinearScan::add_temp(LIR_Opr opr, int temp_pos, IntervalUseKind use_kind) {
-  TRACE_LINEAR_SCAN(2, tty->print(" temp "); opr->print(tty); tty->print_cr(" temp_pos %d (%d)", temp_pos, use_kind));
-  assert(opr->is_register(), "should not be called otherwise");
-
-  if (opr->is_virtual_register()) {
-    assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
-    add_temp(opr->vreg_number(), temp_pos, use_kind, opr->type_register());
-
-  } else {
-    int reg = reg_num(opr);
-    if (is_processed_reg_num(reg)) {
-      add_temp(reg, temp_pos, use_kind, opr->type_register());
-    }
-    reg = reg_numHi(opr);
-    if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
-      add_temp(reg, temp_pos, use_kind, opr->type_register());
-    }
-  }
+	TRACE_LINEAR_SCAN(2, tty->print(" temp "); opr->print(tty); tty->print_cr(" temp_pos %d (%d)", temp_pos, use_kind));
+	assert(opr->is_register(), "should not be called otherwise");
+
+	if (opr->is_virtual_register()) {
+		assert(reg_num(opr) == opr->vreg_number() && !is_valid_reg_num(reg_numHi(opr)), "invalid optimization below");
+		add_temp(opr->vreg_number(), temp_pos, use_kind, opr->type_register());
+
+	} else {
+		int reg = reg_num(opr);
+		if (is_processed_reg_num(reg)) {
+			add_temp(reg, temp_pos, use_kind, opr->type_register());
+		}
+		reg = reg_numHi(opr);
+		if (is_valid_reg_num(reg) && is_processed_reg_num(reg)) {
+			add_temp(reg, temp_pos, use_kind, opr->type_register());
+		}
+	}
 }


 void LinearScan::add_def(int reg_num, int def_pos, IntervalUseKind use_kind, BasicType type) {
-  Interval* interval = interval_at(reg_num);
-  if (interval != NULL) {
-    assert(interval->reg_num() == reg_num, "wrong interval");
-
-    if (type != T_ILLEGAL) {
-      interval->set_type(type);
-    }
-
-    Range* r = interval->first();
-    if (r->from() <= def_pos) {
-      // Update the starting point (when a range is first created for a use, its
-      // start is the beginning of the current block until a def is encountered.)
-      r->set_from(def_pos);
-      interval->add_use_pos(def_pos, use_kind);
-
-    } else {
-      // Dead value - make vacuous interval
-      // also add use_kind for dead intervals
-      interval->add_range(def_pos, def_pos + 1);
-      interval->add_use_pos(def_pos, use_kind);
-      TRACE_LINEAR_SCAN(2, tty->print_cr("Warning: def of reg %d at %d occurs without use", reg_num, def_pos));
-    }
-
-  } else {
-    // Dead value - make vacuous interval
-    // also add use_kind for dead intervals
-    interval = create_interval(reg_num);
-    if (type != T_ILLEGAL) {
-      interval->set_type(type);
-    }
-
-    interval->add_range(def_pos, def_pos + 1);
-    interval->add_use_pos(def_pos, use_kind);
-    TRACE_LINEAR_SCAN(2, tty->print_cr("Warning: dead value %d at %d in live intervals", reg_num, def_pos));
-  }
-
-  change_spill_definition_pos(interval, def_pos);
-  if (use_kind == noUse && interval->spill_state() <= startInMemory) {
-        // detection of method-parameters and roundfp-results
-        // TODO: move this directly to position where use-kind is computed
-    interval->set_spill_state(startInMemory);
-  }
+	Interval* interval = interval_at(reg_num);
+	if (interval != NULL) {
+		assert(interval->reg_num() == reg_num, "wrong interval");
+
+		if (type != T_ILLEGAL) {
+			interval->set_type(type);
+		}
+
+		Range* r = interval->first();
+		if (r->from() <= def_pos) {
+			// Update the starting point (when a range is first created for a use, its
+			// start is the beginning of the current block until a def is encountered.)
+			r->set_from(def_pos);
+			interval->add_use_pos(def_pos, use_kind);
+
+		} else {
+			// Dead value - make vacuous interval
+			// also add use_kind for dead intervals
+			interval->add_range(def_pos, def_pos + 1);
+			interval->add_use_pos(def_pos, use_kind);
+			TRACE_LINEAR_SCAN(2, tty->print_cr("Warning: def of reg %d at %d occurs without use", reg_num, def_pos));
+		}
+
+	} else {
+		// Dead value - make vacuous interval
+		// also add use_kind for dead intervals
+		interval = create_interval(reg_num);
+		if (type != T_ILLEGAL) {
+			interval->set_type(type);
+		}
+
+		interval->add_range(def_pos, def_pos + 1);
+		interval->add_use_pos(def_pos, use_kind);
+		TRACE_LINEAR_SCAN(2, tty->print_cr("Warning: dead value %d at %d in live intervals", reg_num, def_pos));
+	}
+
+	change_spill_definition_pos(interval, def_pos);
+	if (use_kind == noUse && interval->spill_state() <= startInMemory) {
+		// detection of method-parameters and roundfp-results
+		// TODO: move this directly to position where use-kind is computed
+		interval->set_spill_state(startInMemory);
+	}
 }

 void LinearScan::add_use(int reg_num, int from, int to, IntervalUseKind use_kind, BasicType type) {
-  Interval* interval = interval_at(reg_num);
-  if (interval == NULL) {
-    interval = create_interval(reg_num);
-  }
-  assert(interval->reg_num() == reg_num, "wrong interval");
-
-  if (type != T_ILLEGAL) {
-    interval->set_type(type);
-  }
-
-  interval->add_range(from, to);
-  interval->add_use_pos(to, use_kind);
+	Interval* interval = interval_at(reg_num);
+	if (interval == NULL) {
+		interval = create_interval(reg_num);
+	}
+	assert(interval->reg_num() == reg_num, "wrong interval");
+
+	if (type != T_ILLEGAL) {
+		interval->set_type(type);
+	}
+
+	interval->add_range(from, to);
+	interval->add_use_pos(to, use_kind);
 }

 void LinearScan::add_temp(int reg_num, int temp_pos, IntervalUseKind use_kind, BasicType type) {
-  Interval* interval = interval_at(reg_num);
-  if (interval == NULL) {
-    interval = create_interval(reg_num);
-  }
-  assert(interval->reg_num() == reg_num, "wrong interval");
-
-  if (type != T_ILLEGAL) {
-    interval->set_type(type);
-  }
-
-  interval->add_range(temp_pos, temp_pos + 1);
-  interval->add_use_pos(temp_pos, use_kind);
+	Interval* interval = interval_at(reg_num);
+	if (interval == NULL) {
+		interval = create_interval(reg_num);
+	}
+	assert(interval->reg_num() == reg_num, "wrong interval");
+
+	if (type != T_ILLEGAL) {
+		interval->set_type(type);
+	}
+
+	interval->add_range(temp_pos, temp_pos + 1);
+	interval->add_use_pos(temp_pos, use_kind);
 }


@@ -1003,607 +1004,607 @@
 // if the functions return shouldHaveRegister and the interval is spilled,
 // it is not reloaded to a register.
 IntervalUseKind LinearScan::use_kind_of_output_operand(LIR_Op* op, LIR_Opr opr) {
-  if (op->code() == lir_move) {
-    assert(op->as_Op1() != NULL, "lir_move must be LIR_Op1");
-    LIR_Op1* move = (LIR_Op1*)op;
-    LIR_Opr res = move->result_opr();
-    bool result_in_memory = res->is_virtual() && gen()->is_vreg_flag_set(res->vreg_number(), LIRGenerator::must_start_in_memory);
-
-    if (result_in_memory) {
-      // Begin of an interval with must_start_in_memory set.
-      // This interval will always get a stack slot first, so return noUse.
-      return noUse;
-
-    } else if (move->in_opr()->is_stack()) {
-      // method argument (condition must be equal to handle_method_arguments)
-      return noUse;
-
-    } else if (move->in_opr()->is_register() && move->result_opr()->is_register()) {
-      // Move from register to register
-      if (block_of_op_with_id(op->id())->is_set(BlockBegin::osr_entry_flag)) {
-        // special handling of phi-function moves inside osr-entry blocks
-        // input operand must have a register instead of output operand (leads to better register allocation)
-        return shouldHaveRegister;
-      }
-    }
-  }
-
-  if (opr->is_virtual() &&
-      gen()->is_vreg_flag_set(opr->vreg_number(), LIRGenerator::must_start_in_memory)) {
-    // result is a stack-slot, so prevent immediate reloading
-    return noUse;
-  }
-
-  // all other operands require a register
-  return mustHaveRegister;
+	if (op->code() == lir_move) {
+		assert(op->as_Op1() != NULL, "lir_move must be LIR_Op1");
+		LIR_Op1* move = (LIR_Op1*)op;
+		LIR_Opr res = move->result_opr();
+		bool result_in_memory = res->is_virtual() && gen()->is_vreg_flag_set(res->vreg_number(), LIRGenerator::must_start_in_memory);
+
+		if (result_in_memory) {
+			// Begin of an interval with must_start_in_memory set.
+			// This interval will always get a stack slot first, so return noUse.
+			return noUse;
+
+		} else if (move->in_opr()->is_stack()) {
+			// method argument (condition must be equal to handle_method_arguments)
+			return noUse;
+
+		} else if (move->in_opr()->is_register() && move->result_opr()->is_register()) {
+			// Move from register to register
+			if (block_of_op_with_id(op->id())->is_set(BlockBegin::osr_entry_flag)) {
+				// special handling of phi-function moves inside osr-entry blocks
+				// input operand must have a register instead of output operand (leads to better register allocation)
+				return shouldHaveRegister;
+			}
+		}
+	}
+
+	if (opr->is_virtual() &&
+			gen()->is_vreg_flag_set(opr->vreg_number(), LIRGenerator::must_start_in_memory)) {
+		// result is a stack-slot, so prevent immediate reloading
+		return noUse;
+	}
+
+	// all other operands require a register
+	return mustHaveRegister;
 }

 IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) {
-  if (op->code() == lir_move) {
-    assert(op->as_Op1() != NULL, "lir_move must be LIR_Op1");
-    LIR_Op1* move = (LIR_Op1*)op;
-    LIR_Opr res = move->result_opr();
-    bool result_in_memory = res->is_virtual() && gen()->is_vreg_flag_set(res->vreg_number(), LIRGenerator::must_start_in_memory);
-
-    if (result_in_memory) {
-      // Move to an interval with must_start_in_memory set.
-      // To avoid moves from stack to stack (not allowed) force the input operand to a register
-      return mustHaveRegister;
-
-    } else if (move->in_opr()->is_register() && move->result_opr()->is_register()) {
-      // Move from register to register
-      if (block_of_op_with_id(op->id())->is_set(BlockBegin::osr_entry_flag)) {
-        // special handling of phi-function moves inside osr-entry blocks
-        // input operand must have a register instead of output operand (leads to better register allocation)
-        return mustHaveRegister;
-      }
-
-      // The input operand is not forced to a register (moves from stack to register are allowed),
-      // but it is faster if the input operand is in a register
-      return shouldHaveRegister;
-    }
-  }
+	if (op->code() == lir_move) {
+		assert(op->as_Op1() != NULL, "lir_move must be LIR_Op1");
+		LIR_Op1* move = (LIR_Op1*)op;
+		LIR_Opr res = move->result_opr();
+		bool result_in_memory = res->is_virtual() && gen()->is_vreg_flag_set(res->vreg_number(), LIRGenerator::must_start_in_memory);
+
+		if (result_in_memory) {
+			// Move to an interval with must_start_in_memory set.
+			// To avoid moves from stack to stack (not allowed) force the input operand to a register
+			return mustHaveRegister;
+
+		} else if (move->in_opr()->is_register() && move->result_opr()->is_register()) {
+			// Move from register to register
+			if (block_of_op_with_id(op->id())->is_set(BlockBegin::osr_entry_flag)) {
+				// special handling of phi-function moves inside osr-entry blocks
+				// input operand must have a register instead of output operand (leads to better register allocation)
+				return mustHaveRegister;
+			}
+
+			// The input operand is not forced to a register (moves from stack to register are allowed),
+			// but it is faster if the input operand is in a register
+			return shouldHaveRegister;
+		}
+	}


 #ifdef X86
-  if (op->code() == lir_cmove) {
-    // conditional moves can handle stack operands
-    assert(op->result_opr()->is_register(), "result must always be in a register");
-    return shouldHaveRegister;
-  }
-
-  // optimizations for second input operand of arithmehtic operations on Intel
-  // this operand is allowed to be on the stack in some cases
-  BasicType opr_type = opr->type_register();
-  if (opr_type == T_FLOAT || opr_type == T_DOUBLE) {
-    if ((UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2) {
-      // SSE float instruction (T_DOUBLE only supported with SSE2)
-      switch (op->code()) {
-        case lir_cmp:
-        case lir_add:
-        case lir_sub:
-        case lir_mul:
-        case lir_div:
-        {
-          assert(op->as_Op2() != NULL, "must be LIR_Op2");
-          LIR_Op2* op2 = (LIR_Op2*)op;
-          if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) {
-            assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register");
-            return shouldHaveRegister;
-          }
-        }
-      }
-    } else {
-      // FPU stack float instruction
-      switch (op->code()) {
-        case lir_add:
-        case lir_sub:
-        case lir_mul:
-        case lir_div:
-        {
-          assert(op->as_Op2() != NULL, "must be LIR_Op2");
-          LIR_Op2* op2 = (LIR_Op2*)op;
-          if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) {
-            assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register");
-            return shouldHaveRegister;
-          }
-        }
-      }
-    }
-
-  } else if (opr_type != T_LONG) {
-    // integer instruction (note: long operands must always be in register)
-    switch (op->code()) {
-      case lir_cmp:
-      case lir_add:
-      case lir_sub:
-      case lir_logic_and:
-      case lir_logic_or:
-      case lir_logic_xor:
-      {
-        assert(op->as_Op2() != NULL, "must be LIR_Op2");
-        LIR_Op2* op2 = (LIR_Op2*)op;
-        if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) {
-          assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register");
-          return shouldHaveRegister;
-        }
-      }
-    }
-  }
+	if (op->code() == lir_cmove) {
+		// conditional moves can handle stack operands
+		assert(op->result_opr()->is_register(), "result must always be in a register");
+		return shouldHaveRegister;
+	}
+
+	// optimizations for second input operand of arithmehtic operations on Intel
+	// this operand is allowed to be on the stack in some cases
+	BasicType opr_type = opr->type_register();
+	if (opr_type == T_FLOAT || opr_type == T_DOUBLE) {
+		if ((UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2) {
+			// SSE float instruction (T_DOUBLE only supported with SSE2)
+			switch (op->code()) {
+				case lir_cmp:
+				case lir_add:
+				case lir_sub:
+				case lir_mul:
+				case lir_div:
+					{
+						assert(op->as_Op2() != NULL, "must be LIR_Op2");
+						LIR_Op2* op2 = (LIR_Op2*)op;
+						if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) {
+							assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register");
+							return shouldHaveRegister;
+						}
+					}
+			}
+		} else {
+			// FPU stack float instruction
+			switch (op->code()) {
+				case lir_add:
+				case lir_sub:
+				case lir_mul:
+				case lir_div:
+					{
+						assert(op->as_Op2() != NULL, "must be LIR_Op2");
+						LIR_Op2* op2 = (LIR_Op2*)op;
+						if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) {
+							assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register");
+							return shouldHaveRegister;
+						}
+					}
+			}
+		}
+
+	} else if (opr_type != T_LONG) {
+		// integer instruction (note: long operands must always be in register)
+		switch (op->code()) {
+			case lir_cmp:
+			case lir_add:
+			case lir_sub:
+			case lir_logic_and:
+			case lir_logic_or:
+			case lir_logic_xor:
+				{
+					assert(op->as_Op2() != NULL, "must be LIR_Op2");
+					LIR_Op2* op2 = (LIR_Op2*)op;
+					if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) {
+						assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register");
+						return shouldHaveRegister;
+					}
+				}
+		}
+	}
 #endif // X86

-  // all other operands require a register
-  return mustHaveRegister;
+	// all other operands require a register
+	return mustHaveRegister;
 }


 void LinearScan::handle_method_arguments(LIR_Op* op) {
-  // special handling for method arguments (moves from stack to virtual register):
-  // the interval gets no register assigned, but the stack slot.
-  // it is split before the first use by the register allocator.
-
-  if (op->code() == lir_move) {
-    assert(op->as_Op1() != NULL, "must be LIR_Op1");
-    LIR_Op1* move = (LIR_Op1*)op;
-
-    if (move->in_opr()->is_stack()) {
+	// special handling for method arguments (moves from stack to virtual register):
+	// the interval gets no register assigned, but the stack slot.
+	// it is split before the first use by the register allocator.
+
+	if (op->code() == lir_move) {
+		assert(op->as_Op1() != NULL, "must be LIR_Op1");
+		LIR_Op1* move = (LIR_Op1*)op;
+
+		if (move->in_opr()->is_stack()) {
 #ifdef ASSERT
-      int arg_size = compilation()->method()->arg_size();
-      LIR_Opr o = move->in_opr();
-      if (o->is_single_stack()) {
-        assert(o->single_stack_ix() >= 0 && o->single_stack_ix() < arg_size, "out of range");
-      } else if (o->is_double_stack()) {
-        assert(o->double_stack_ix() >= 0 && o->double_stack_ix() < arg_size, "out of range");
-      } else {
-        ShouldNotReachHere();
-      }
-
-      assert(move->id() > 0, "invalid id");
-      assert(block_of_op_with_id(move->id())->number_of_preds() == 0, "move from stack must be in first block");
-      assert(move->result_opr()->is_virtual(), "result of move must be a virtual register");
-
-      TRACE_LINEAR_SCAN(4, tty->print_cr("found move from stack slot %d to vreg %d", o->is_single_stack() ? o->single_stack_ix() : o->double_stack_ix(), reg_num(move->result_opr())));
+			int arg_size = compilation()->method()->arg_size();
+			LIR_Opr o = move->in_opr();
+			if (o->is_single_stack()) {
+				assert(o->single_stack_ix() >= 0 && o->single_stack_ix() < arg_size, "out of range");
+			} else if (o->is_double_stack()) {
+				assert(o->double_stack_ix() >= 0 && o->double_stack_ix() < arg_size, "out of range");
+			} else {
+				ShouldNotReachHere();
+			}
+
+			assert(move->id() > 0, "invalid id");
+			assert(block_of_op_with_id(move->id())->number_of_preds() == 0, "move from stack must be in first block");
+			assert(move->result_opr()->is_virtual(), "result of move must be a virtual register");
+
+			TRACE_LINEAR_SCAN(4, tty->print_cr("found move from stack slot %d to vreg %d", o->is_single_stack() ? o->single_stack_ix() : o->double_stack_ix(), reg_num(move->result_opr())));
 #endif

-      Interval* interval = interval_at(reg_num(move->result_opr()));
-
-      int stack_slot = LinearScan::nof_regs + (move->in_opr()->is_single_stack() ? move->in_opr()->single_stack_ix() : move->in_opr()->double_stack_ix());
-      interval->set_canonical_spill_slot(stack_slot);
-      interval->assign_reg(stack_slot);
-    }
-  }
+			Interval* interval = interval_at(reg_num(move->result_opr()));
+
+			int stack_slot = LinearScan::nof_regs + (move->in_opr()->is_single_stack() ? move->in_opr()->single_stack_ix() : move->in_opr()->double_stack_ix());
+			interval->set_canonical_spill_slot(stack_slot);
+			interval->assign_reg(stack_slot);
+		}
+	}
 }

 void LinearScan::handle_doubleword_moves(LIR_Op* op) {
-  // special handling for doubleword move from memory to register:
-  // in this case the registers of the input address and the result
-  // registers must not overlap -> add a temp range for the input registers
-  if (op->code() == lir_move) {
-    assert(op->as_Op1() != NULL, "must be LIR_Op1");
-    LIR_Op1* move = (LIR_Op1*)op;
-
-    if (move->result_opr()->is_double_cpu() && move->in_opr()->is_pointer()) {
-      LIR_Address* address = move->in_opr()->as_address_ptr();
-      if (address != NULL) {
-        if (address->base()->is_valid()) {
-          add_temp(address->base(), op->id(), noUse);
-        }
-        if (address->index()->is_valid()) {
-          add_temp(address->index(), op->id(), noUse);
-        }
-      }
-    }
-  }
+	// special handling for doubleword move from memory to register:
+	// in this case the registers of the input address and the result
+	// registers must not overlap -> add a temp range for the input registers
+	if (op->code() == lir_move) {
+		assert(op->as_Op1() != NULL, "must be LIR_Op1");
+		LIR_Op1* move = (LIR_Op1*)op;
+
+		if (move->result_opr()->is_double_cpu() && move->in_opr()->is_pointer()) {
+			LIR_Address* address = move->in_opr()->as_address_ptr();
+			if (address != NULL) {
+				if (address->base()->is_valid()) {
+					add_temp(address->base(), op->id(), noUse);
+				}
+				if (address->index()->is_valid()) {
+					add_temp(address->index(), op->id(), noUse);
+				}
+			}
+		}
+	}
 }

 void LinearScan::add_register_hints(LIR_Op* op) {
-  switch (op->code()) {
-    case lir_move:      // fall through
-    case lir_convert: {
-      assert(op->as_Op1() != NULL, "lir_move, lir_convert must be LIR_Op1");
-      LIR_Op1* move = (LIR_Op1*)op;
-
-      LIR_Opr move_from = move->in_opr();
-      LIR_Opr move_to = move->result_opr();
-
-      if (move_to->is_register() && move_from->is_register()) {
-        Interval* from = interval_at(reg_num(move_from));
-        Interval* to = interval_at(reg_num(move_to));
-        if (from != NULL && to != NULL) {
-          to->set_register_hint(from);
-          TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", move->id(), from->reg_num(), to->reg_num()));
-        }
-      }
-      break;
-    }
-    case lir_cmove: {
-      assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
-      LIR_Op2* cmove = (LIR_Op2*)op;
-
-      LIR_Opr move_from = cmove->in_opr1();
-      LIR_Opr move_to = cmove->result_opr();
-
-      if (move_to->is_register() && move_from->is_register()) {
-        Interval* from = interval_at(reg_num(move_from));
-        Interval* to = interval_at(reg_num(move_to));
-        if (from != NULL && to != NULL) {
-          to->set_register_hint(from);
-          TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num()));
-        }
-      }
-      break;
-    }
-  }
+	switch (op->code()) {
+		case lir_move:      // fall through
+		case lir_convert: {
+					  assert(op->as_Op1() != NULL, "lir_move, lir_convert must be LIR_Op1");
+					  LIR_Op1* move = (LIR_Op1*)op;
+
+					  LIR_Opr move_from = move->in_opr();
+					  LIR_Opr move_to = move->result_opr();
+
+					  if (move_to->is_register() && move_from->is_register()) {
+						  Interval* from = interval_at(reg_num(move_from));
+						  Interval* to = interval_at(reg_num(move_to));
+						  if (from != NULL && to != NULL) {
+							  to->set_register_hint(from);
+							  TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", move->id(), from->reg_num(), to->reg_num()));
+						  }
+					  }
+					  break;
+				  }
+		case lir_cmove: {
+					assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
+					LIR_Op2* cmove = (LIR_Op2*)op;
+
+					LIR_Opr move_from = cmove->in_opr1();
+					LIR_Opr move_to = cmove->result_opr();
+
+					if (move_to->is_register() && move_from->is_register()) {
+						Interval* from = interval_at(reg_num(move_from));
+						Interval* to = interval_at(reg_num(move_to));
+						if (from != NULL && to != NULL) {
+							to->set_register_hint(from);
+							TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num()));
+						}
+					}
+					break;
+				}
+	}
 }


 void LinearScan::build_intervals() {
-  TIME_LINEAR_SCAN(timer_build_intervals);
-
-  // initialize interval list with expected number of intervals
-  // (32 is added to have some space for split children without having to resize the list)
-  _intervals = IntervalList(num_virtual_regs() + 32);
-  // initialize all slots that are used by build_intervals
-  _intervals.at_put_grow(num_virtual_regs() - 1, NULL, NULL);
-
-  // create a list with all caller-save registers (cpu, fpu, xmm)
-  // when an instruction is a call, a temp range is created for all these registers
-  int num_caller_save_registers = 0;
-  int caller_save_registers[LinearScan::nof_regs];
-
-  int i;
-  for (i = 0; i < FrameMap::nof_caller_save_cpu_regs; i++) {
-    LIR_Opr opr = FrameMap::caller_save_cpu_reg_at(i);
-    assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands");
-    assert(reg_numHi(opr) == -1, "missing addition of range for hi-register");
-    caller_save_registers[num_caller_save_registers++] = reg_num(opr);
-  }
-
-  // temp ranges for fpu registers are only created when the method has
-  // virtual fpu operands. Otherwise no allocation for fpu registers is
-  // perfomed and so the temp ranges would be useless
-  if (has_fpu_registers()) {
+	TIME_LINEAR_SCAN(timer_build_intervals);
+
+	// initialize interval list with expected number of intervals
+	// (32 is added to have some space for split children without having to resize the list)
+	_intervals = IntervalList(num_virtual_regs() + 32);
+	// initialize all slots that are used by build_intervals
+	_intervals.at_put_grow(num_virtual_regs() - 1, NULL, NULL);
+
+	// create a list with all caller-save registers (cpu, fpu, xmm)
+	// when an instruction is a call, a temp range is created for all these registers
+	int num_caller_save_registers = 0;
+	int caller_save_registers[LinearScan::nof_regs];
+
+	int i;
+	for (i = 0; i < FrameMap::nof_caller_save_cpu_regs; i++) {
+		LIR_Opr opr = FrameMap::caller_save_cpu_reg_at(i);
+		assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands");
+		assert(reg_numHi(opr) == -1, "missing addition of range for hi-register");
+		caller_save_registers[num_caller_save_registers++] = reg_num(opr);
+	}
+
+	// temp ranges for fpu registers are only created when the method has
+	// virtual fpu operands. Otherwise no allocation for fpu registers is
+	// perfomed and so the temp ranges would be useless
+	if (has_fpu_registers()) {
 #ifdef X86
-    if (UseSSE < 2) {
+		if (UseSSE < 2) {
 #endif
-      for (i = 0; i < FrameMap::nof_caller_save_fpu_regs; i++) {
-        LIR_Opr opr = FrameMap::caller_save_fpu_reg_at(i);
-        assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands");
-        assert(reg_numHi(opr) == -1, "missing addition of range for hi-register");
-        caller_save_registers[num_caller_save_registers++] = reg_num(opr);
-      }
+			for (i = 0; i < FrameMap::nof_caller_save_fpu_regs; i++) {
+				LIR_Opr opr = FrameMap::caller_save_fpu_reg_at(i);
+				assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands");
+				assert(reg_numHi(opr) == -1, "missing addition of range for hi-register");
+				caller_save_registers[num_caller_save_registers++] = reg_num(opr);
+			}
 #ifdef X86
-    }
-    if (UseSSE > 0) {
-      for (i = 0; i < FrameMap::nof_caller_save_xmm_regs; i++) {
-        LIR_Opr opr = FrameMap::caller_save_xmm_reg_at(i);
-        assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands");
-        assert(reg_numHi(opr) == -1, "missing addition of range for hi-register");
-        caller_save_registers[num_caller_save_registers++] = reg_num(opr);
-      }
-    }
+		}
+		if (UseSSE > 0) {
+			for (i = 0; i < FrameMap::nof_caller_save_xmm_regs; i++) {
+				LIR_Opr opr = FrameMap::caller_save_xmm_reg_at(i);
+				assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands");
+				assert(reg_numHi(opr) == -1, "missing addition of range for hi-register");
+				caller_save_registers[num_caller_save_registers++] = reg_num(opr);
+			}
+		}
 #endif
-  }
-  assert(num_caller_save_registers <= LinearScan::nof_regs, "out of bounds");
-
-
-  LIR_OpVisitState visitor;
-
-  // iterate all blocks in reverse order
-  for (i = block_count() - 1; i >= 0; i--) {
-    BlockBegin* block = block_at(i);
-    LIR_OpList* instructions = block->lir()->instructions_list();
-    int         block_from =   block->first_lir_instruction_id();
-    int         block_to =     block->last_lir_instruction_id();
-
-    assert(block_from == instructions->at(0)->id(), "must be");
-    assert(block_to   == instructions->at(instructions->length() - 1)->id(), "must be");
-
-    // Update intervals for registers live at the end of this block;
-    BitMap live = block->live_out();
-    int size = (int)live.size();
-    for (int number = (int)live.get_next_one_offset(0, size); number < size; number = (int)live.get_next_one_offset(number + 1, size)) {
-      assert(live.at(number), "should not stop here otherwise");
-      assert(number >= LIR_OprDesc::vreg_base, "fixed intervals must not be live on block bounds");
-      TRACE_LINEAR_SCAN(2, tty->print_cr("live in %d to %d", number, block_to + 2));
-
-      add_use(number, block_from, block_to + 2, noUse, T_ILLEGAL);
-
-      // add special use positions for loop-end blocks when the
-      // interval is used anywhere inside this loop.  It's possible
-      // that the block was part of a non-natural loop, so it might
-      // have an invalid loop index.
-      if (block->is_set(BlockBegin::linear_scan_loop_end_flag) &&
-          block->loop_index() != -1 &&
-          is_interval_in_loop(number, block->loop_index())) {
-        interval_at(number)->add_use_pos(block_to + 1, loopEndMarker);
-      }
-    }
-
-    // iterate all instructions of the block in reverse order.
-    // skip the first instruction because it is always a label
-    // definitions of intervals are processed before uses
-    assert(visitor.no_operands(instructions->at(0)), "first operation must always be a label");
-    for (int j = instructions->length() - 1; j >= 1; j--) {
-      LIR_Op* op = instructions->at(j);
-      int op_id = op->id();
-
-      // visit operation to collect all operands
-      visitor.visit(op);
-
-      // add a temp range for each register if operation destroys caller-save registers
-      if (visitor.has_call()) {
-        for (int k = 0; k < num_caller_save_registers; k++) {
-          add_temp(caller_save_registers[k], op_id, noUse, T_ILLEGAL);
-        }
-        TRACE_LINEAR_SCAN(4, tty->print_cr("operation destroys all caller-save registers"));
-      }
-
-      // Add any platform dependent temps
-      pd_add_temps(op);
-
-      // visit definitions (output and temp operands)
-      int k, n;
-      n = visitor.opr_count(LIR_OpVisitState::outputMode);
-      for (k = 0; k < n; k++) {
-        LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::outputMode, k);
-        assert(opr->is_register(), "visitor should only return register operands");
-        add_def(opr, op_id, use_kind_of_output_operand(op, opr));
-      }
-
-      n = visitor.opr_count(LIR_OpVisitState::tempMode);
-      for (k = 0; k < n; k++) {
-        LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::tempMode, k);
-        assert(opr->is_register(), "visitor should only return register operands");
-        add_temp(opr, op_id, mustHaveRegister);
-      }
-
-      // visit uses (input operands)
-      n = visitor.opr_count(LIR_OpVisitState::inputMode);
-      for (k = 0; k < n; k++) {
-        LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::inputMode, k);
-        assert(opr->is_register(), "visitor should only return register operands");
-        add_use(opr, block_from, op_id, use_kind_of_input_operand(op, opr));
-      }
-
-      // Add uses of live locals from interpreter's point of view for proper
-      // debug information generation
-      // Treat these operands as temp values (if the life range is extended
-      // to a call site, the value would be in a register at the call otherwise)
-      n = visitor.info_count();
-      for (k = 0; k < n; k++) {
-        CodeEmitInfo* info = visitor.info_at(k);
-        ValueStack* stack = info->stack();
-        for_each_state_value(stack, value,
-          add_use(value, block_from, op_id + 1, noUse);
-        );
-      }
-
-      // special steps for some instructions (especially moves)
-      handle_method_arguments(op);
-      handle_doubleword_moves(op);
-      add_register_hints(op);
-
-    } // end of instruction iteration
-  } // end of block iteration
-
-
-  // add the range [0, 1[ to all fixed intervals
-  // -> the register allocator need not handle unhandled fixed intervals
-  for (int n = 0; n < LinearScan::nof_regs; n++) {
-    Interval* interval = interval_at(n);
-    if (interval != NULL) {
-      interval->add_range(0, 1);
-    }
-  }
+	}
+	assert(num_caller_save_registers <= LinearScan::nof_regs, "out of bounds");
+
+
+	LIR_OpVisitState visitor;
+
+	// iterate all blocks in reverse order
+	for (i = block_count() - 1; i >= 0; i--) {
+		BlockBegin* block = block_at(i);
+		LIR_OpList* instructions = block->lir()->instructions_list();
+		int         block_from =   block->first_lir_instruction_id();
+		int         block_to =     block->last_lir_instruction_id();
+
+		assert(block_from == instructions->at(0)->id(), "must be");
+		assert(block_to   == instructions->at(instructions->length() - 1)->id(), "must be");
+
+		// Update intervals for registers live at the end of this block;
+		BitMap live = block->live_out();
+		int size = (int)live.size();
+		for (int number = (int)live.get_next_one_offset(0, size); number < size; number = (int)live.get_next_one_offset(number + 1, size)) {
+			assert(live.at(number), "should not stop here otherwise");
+			assert(number >= LIR_OprDesc::vreg_base, "fixed intervals must not be live on block bounds");
+			TRACE_LINEAR_SCAN(2, tty->print_cr("live in %d to %d", number, block_to + 2));
+
+			add_use(number, block_from, block_to + 2, noUse, T_ILLEGAL);
+
+			// add special use positions for loop-end blocks when the
+			// interval is used anywhere inside this loop.  It's possible
+			// that the block was part of a non-natural loop, so it might
+			// have an invalid loop index.
+			if (block->is_set(BlockBegin::linear_scan_loop_end_flag) &&
+					block->loop_index() != -1 &&
+					is_interval_in_loop(number, block->loop_index())) {
+				interval_at(number)->add_use_pos(block_to + 1, loopEndMarker);
+			}
+		}
+
+		// iterate all instructions of the block in reverse order.
+		// skip the first instruction because it is always a label
+		// definitions of intervals are processed before uses
+		assert(visitor.no_operands(instructions->at(0)), "first operation must always be a label");
+		for (int j = instructions->length() - 1; j >= 1; j--) {
+			LIR_Op* op = instructions->at(j);
+			int op_id = op->id();
+
+			// visit operation to collect all operands
+			visitor.visit(op);
+
+			// add a temp range for each register if operation destroys caller-save registers
+			if (visitor.has_call()) {
+				for (int k = 0; k < num_caller_save_registers; k++) {
+					add_temp(caller_save_registers[k], op_id, noUse, T_ILLEGAL);
+				}
+				TRACE_LINEAR_SCAN(4, tty->print_cr("operation destroys all caller-save registers"));
+			}
+
+			// Add any platform dependent temps
+			pd_add_temps(op);
+
+			// visit definitions (output and temp operands)
+			int k, n;
+			n = visitor.opr_count(LIR_OpVisitState::outputMode);
+			for (k = 0; k < n; k++) {
+				LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::outputMode, k);
+				assert(opr->is_register(), "visitor should only return register operands");
+				add_def(opr, op_id, use_kind_of_output_operand(op, opr));
+			}
+
+			n = visitor.opr_count(LIR_OpVisitState::tempMode);
+			for (k = 0; k < n; k++) {
+				LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::tempMode, k);
+				assert(opr->is_register(), "visitor should only return register operands");
+				add_temp(opr, op_id, mustHaveRegister);
+			}
+
+			// visit uses (input operands)
+			n = visitor.opr_count(LIR_OpVisitState::inputMode);
+			for (k = 0; k < n; k++) {
+				LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::inputMode, k);
+				assert(opr->is_register(), "visitor should only return register operands");
+				add_use(opr, block_from, op_id, use_kind_of_input_operand(op, opr));
+			}
+
+			// Add uses of live locals from interpreter's point of view for proper
+			// debug information generation
+			// Treat these operands as temp values (if the life range is extended
+			// to a call site, the value would be in a register at the call otherwise)
+			n = visitor.info_count();
+			for (k = 0; k < n; k++) {
+				CodeEmitInfo* info = visitor.info_at(k);
+				ValueStack* stack = info->stack();
+				for_each_state_value(stack, value,
+						add_use(value, block_from, op_id + 1, noUse);
+						);
+			}
+
+			// special steps for some instructions (especially moves)
+			handle_method_arguments(op);
+			handle_doubleword_moves(op);
+			add_register_hints(op);
+
+		} // end of instruction iteration
+	} // end of block iteration
+
+
+	// add the range [0, 1[ to all fixed intervals
+	// -> the register allocator need not handle unhandled fixed intervals
+	for (int n = 0; n < LinearScan::nof_regs; n++) {
+		Interval* interval = interval_at(n);
+		if (interval != NULL) {
+			interval->add_range(0, 1);
+		}
+	}
 }


 // ********** Phase 5: actual register allocation

 int LinearScan::interval_cmp(Interval** a, Interval** b) {
-  if (*a != NULL) {
-    if (*b != NULL) {
-      return (*a)->from() - (*b)->from();
-    } else {
-      return -1;
-    }
-  } else {
-    if (*b != NULL) {
-      return 1;
-    } else {
-      return 0;
-    }
-  }
+	if (*a != NULL) {
+		if (*b != NULL) {
+			return (*a)->from() - (*b)->from();
+		} else {
+			return -1;
+		}
+	} else {
+		if (*b != NULL) {
+			return 1;
+		} else {
+			return 0;
+		}
+	}
 }

 #ifndef PRODUCT
 bool LinearScan::is_sorted(IntervalArray* intervals) {
-  int from = -1;
-  int i, j;
-  for (i = 0; i < intervals->length(); i ++) {
-    Interval* it = intervals->at(i);
-    if (it != NULL) {
-      if (from > it->from()) {
-        assert(false, "");
-        return false;
-      }
-      from = it->from();
-    }
-  }
-
-  // check in both directions if sorted list and unsorted list contain same intervals
-  for (i = 0; i < interval_count(); i++) {
-    if (interval_at(i) != NULL) {
-      int num_found = 0;
-      for (j = 0; j < intervals->length(); j++) {
-        if (interval_at(i) == intervals->at(j)) {
-          num_found++;
-        }
-      }
-      assert(num_found == 1, "lists do not contain same intervals");
-    }
-  }
-  for (j = 0; j < intervals->length(); j++) {
-    int num_found = 0;
-    for (i = 0; i < interval_count(); i++) {
-      if (interval_at(i) == intervals->at(j)) {
-        num_found++;
-      }
-    }
-    assert(num_found == 1, "lists do not contain same intervals");
-  }
-
-  return true;
+	int from = -1;
+	int i, j;
+	for (i = 0; i < intervals->length(); i ++) {
+		Interval* it = intervals->at(i);
+		if (it != NULL) {
+			if (from > it->from()) {
+				assert(false, "");
+				return false;
+			}
+			from = it->from();
+		}
+	}
+
+	// check in both directions if sorted list and unsorted list contain same intervals
+	for (i = 0; i < interval_count(); i++) {
+		if (interval_at(i) != NULL) {
+			int num_found = 0;
+			for (j = 0; j < intervals->length(); j++) {
+				if (interval_at(i) == intervals->at(j)) {
+					num_found++;
+				}
+			}
+			assert(num_found == 1, "lists do not contain same intervals");
+		}
+	}
+	for (j = 0; j < intervals->length(); j++) {
+		int num_found = 0;
+		for (i = 0; i < interval_count(); i++) {
+			if (interval_at(i) == intervals->at(j)) {
+				num_found++;
+			}
+		}
+		assert(num_found == 1, "lists do not contain same intervals");
+	}
+
+	return true;
 }
 #endif

 void LinearScan::add_to_list(Interval** first, Interval** prev, Interval* interval) {
-  if (*prev != NULL) {
-    (*prev)->set_next(interval);
-  } else {
-    *first = interval;
-  }
-  *prev = interval;
+	if (*prev != NULL) {
+		(*prev)->set_next(interval);
+	} else {
+		*first = interval;
+	}
+	*prev = interval;
 }

 void LinearScan::create_unhandled_lists(Interval** list1, Interval** list2, bool (is_list1)(const Interval* i), bool (is_list2)(const Interval* i)) {
-  assert(is_sorted(_sorted_intervals), "interval list is not sorted");
-
-  *list1 = *list2 = Interval::end();
-
-  Interval* list1_prev = NULL;
-  Interval* list2_prev = NULL;
-  Interval* v;
-
-  const int n = _sorted_intervals->length();
-  for (int i = 0; i < n; i++) {
-    v = _sorted_intervals->at(i);
-    if (v == NULL) continue;
-
-    if (is_list1(v)) {
-      add_to_list(list1, &list1_prev, v);
-    } else if (is_list2 == NULL || is_list2(v)) {
-      add_to_list(list2, &list2_prev, v);
-    }
-  }
-
-  if (list1_prev != NULL) list1_prev->set_next(Interval::end());
-  if (list2_prev != NULL) list2_prev->set_next(Interval::end());
-
-  assert(list1_prev == NULL || list1_prev->next() == Interval::end(), "linear list ends not with sentinel");
-  assert(list2_prev == NULL || list2_prev->next() == Interval::end(), "linear list ends not with sentinel");
+	assert(is_sorted(_sorted_intervals), "interval list is not sorted");
+
+	*list1 = *list2 = Interval::end();
+
+	Interval* list1_prev = NULL;
+	Interval* list2_prev = NULL;
+	Interval* v;
+
+	const int n = _sorted_intervals->length();
+	for (int i = 0; i < n; i++) {
+		v = _sorted_intervals->at(i);
+		if (v == NULL) continue;
+
+		if (is_list1(v)) {
+			add_to_list(list1, &list1_prev, v);
+		} else if (is_list2 == NULL || is_list2(v)) {
+			add_to_list(list2, &list2_prev, v);
+		}
+	}
+
+	if (list1_prev != NULL) list1_prev->set_next(Interval::end());
+	if (list2_prev != NULL) list2_prev->set_next(Interval::end());
+
+	assert(list1_prev == NULL || list1_prev->next() == Interval::end(), "linear list ends not with sentinel");
+	assert(list2_prev == NULL || list2_prev->next() == Interval::end(), "linear list ends not with sentinel");
 }


 void LinearScan::sort_intervals_before_allocation() {
-  TIME_LINEAR_SCAN(timer_sort_intervals_before);
-
-  IntervalList* unsorted_list = &_intervals;
-  int unsorted_len = unsorted_list->length();
-  int sorted_len = 0;
-  int unsorted_idx;
-  int sorted_idx = 0;
-  int sorted_from_max = -1;
-
-  // calc number of items for sorted list (sorted list must not contain NULL values)
-  for (unsorted_idx = 0; unsorted_idx < unsorted_len; unsorted_idx++) {
-    if (unsorted_list->at(unsorted_idx) != NULL) {
-      sorted_len++;
-    }
-  }
-  IntervalArray* sorted_list = new IntervalArray(sorted_len);
-
-  // special sorting algorithm: the original interval-list is almost sorted,
-  // only some intervals are swapped. So this is much faster than a complete QuickSort
-  for (unsorted_idx = 0; unsorted_idx < unsorted_len; unsorted_idx++) {
-    Interval* cur_interval = unsorted_list->at(unsorted_idx);
-
-    if (cur_interval != NULL) {
-      int cur_from = cur_interval->from();
-
-      if (sorted_from_max <= cur_from) {
-        sorted_list->at_put(sorted_idx++, cur_interval);
-        sorted_from_max = cur_interval->from();
-      } else {
-        // the asumption that the intervals are already sorted failed,
-        // so this interval must be sorted in manually
-        int j;
-        for (j = sorted_idx - 1; j >= 0 && cur_from < sorted_list->at(j)->from(); j--) {
-          sorted_list->at_put(j + 1, sorted_list->at(j));
-        }
-        sorted_list->at_put(j + 1, cur_interval);
-        sorted_idx++;
-      }
-    }
-  }
-  _sorted_intervals = sorted_list;
+	TIME_LINEAR_SCAN(timer_sort_intervals_before);
+
+	IntervalList* unsorted_list = &_intervals;
+	int unsorted_len = unsorted_list->length();
+	int sorted_len = 0;
+	int unsorted_idx;
+	int sorted_idx = 0;
+	int sorted_from_max = -1;
+
+	// calc number of items for sorted list (sorted list must not contain NULL values)
+	for (unsorted_idx = 0; unsorted_idx < unsorted_len; unsorted_idx++) {
+		if (unsorted_list->at(unsorted_idx) != NULL) {
+			sorted_len++;
+		}
+	}
+	IntervalArray* sorted_list = new IntervalArray(sorted_len);
+
+	// special sorting algorithm: the original interval-list is almost sorted,
+	// only some intervals are swapped. So this is much faster than a complete QuickSort
+	for (unsorted_idx = 0; unsorted_idx < unsorted_len; unsorted_idx++) {
+		Interval* cur_interval = unsorted_list->at(unsorted_idx);
+
+		if (cur_interval != NULL) {
+			int cur_from = cur_interval->from();
+
+			if (sorted_from_max <= cur_from) {
+				sorted_list->at_put(sorted_idx++, cur_interval);
+				sorted_from_max = cur_interval->from();
+			} else {
+				// the asumption that the intervals are already sorted failed,
+				// so this interval must be sorted in manually
+				int j;
+				for (j = sorted_idx - 1; j >= 0 && cur_from < sorted_list->at(j)->from(); j--) {
+					sorted_list->at_put(j + 1, sorted_list->at(j));
+				}
+				sorted_list->at_put(j + 1, cur_interval);
+				sorted_idx++;
+			}
+		}
+	}
+	_sorted_intervals = sorted_list;
 }

 void LinearScan::sort_intervals_after_allocation() {
-  TIME_LINEAR_SCAN(timer_sort_intervals_after);
-
-  IntervalArray* old_list      = _sorted_intervals;
-  IntervalList*  new_list      = _new_intervals_from_allocation;
-  int old_len = old_list->length();
-  int new_len = new_list->length();
-
-  if (new_len == 0) {
-    // no intervals have been added during allocation, so sorted list is already up to date
-    return;
-  }
-
-  // conventional sort-algorithm for new intervals
-  new_list->sort(interval_cmp);
-
-  // merge old and new list (both already sorted) into one combined list
-  IntervalArray* combined_list = new IntervalArray(old_len + new_len);
-  int old_idx = 0;
-  int new_idx = 0;
-
-  while (old_idx + new_idx < old_len + new_len) {
-    if (new_idx >= new_len || (old_idx < old_len && old_list->at(old_idx)->from() <= new_list->at(new_idx)->from())) {
-      combined_list->at_put(old_idx + new_idx, old_list->at(old_idx));
-      old_idx++;
-    } else {
-      combined_list->at_put(old_idx + new_idx, new_list->at(new_idx));
-      new_idx++;
-    }
-  }
-
-  _sorted_intervals = combined_list;
+	TIME_LINEAR_SCAN(timer_sort_intervals_after);
+
+	IntervalArray* old_list      = _sorted_intervals;
+	IntervalList*  new_list      = _new_intervals_from_allocation;
+	int old_len = old_list->length();
+	int new_len = new_list->length();
+
+	if (new_len == 0) {
+		// no intervals have been added during allocation, so sorted list is already up to date
+		return;
+	}
+
+	// conventional sort-algorithm for new intervals
+	new_list->sort(interval_cmp);
+
+	// merge old and new list (both already sorted) into one combined list
+	IntervalArray* combined_list = new IntervalArray(old_len + new_len);
+	int old_idx = 0;
+	int new_idx = 0;
+
+	while (old_idx + new_idx < old_len + new_len) {
+		if (new_idx >= new_len || (old_idx < old_len && old_list->at(old_idx)->from() <= new_list->at(new_idx)->from())) {
+			combined_list->at_put(old_idx + new_idx, old_list->at(old_idx));
+			old_idx++;
+		} else {
+			combined_list->at_put(old_idx + new_idx, new_list->at(new_idx));
+			new_idx++;
+		}
+	}
+
+	_sorted_intervals = combined_list;
 }


 void LinearScan::allocate_registers() {
-  TIME_LINEAR_SCAN(timer_allocate_registers);
-
-  Interval* precolored_cpu_intervals, *not_precolored_cpu_intervals;
-  Interval* precolored_fpu_intervals, *not_precolored_fpu_intervals;
-
-  create_unhandled_lists(&precolored_cpu_intervals, &not_precolored_cpu_intervals, is_precolored_cpu_interval, is_virtual_cpu_interval);
-  if (has_fpu_registers()) {
-    create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals, is_precolored_fpu_interval, is_virtual_fpu_interval);
+	TIME_LINEAR_SCAN(timer_allocate_registers);
+
+	Interval* precolored_cpu_intervals, *not_precolored_cpu_intervals;
+	Interval* precolored_fpu_intervals, *not_precolored_fpu_intervals;
+
+	create_unhandled_lists(&precolored_cpu_intervals, &not_precolored_cpu_intervals, is_precolored_cpu_interval, is_virtual_cpu_interval);
+	if (has_fpu_registers()) {
+		create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals, is_precolored_fpu_interval, is_virtual_fpu_interval);
 #ifdef ASSERT
-  } else {
-    // fpu register allocation is omitted because no virtual fpu registers are present
-    // just check this again...
-    create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals, is_precolored_fpu_interval, is_virtual_fpu_interval);
-    assert(not_precolored_fpu_intervals == Interval::end(), "missed an uncolored fpu interval");
+	} else {
+		// fpu register allocation is omitted because no virtual fpu registers are present
+		// just check this again...
+		create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals, is_precolored_fpu_interval, is_virtual_fpu_interval);
+		assert(not_precolored_fpu_intervals == Interval::end(), "missed an uncolored fpu interval");
 #endif
-  }
-
-  // allocate cpu registers
-  LinearScanWalker cpu_lsw(this, precolored_cpu_intervals, not_precolored_cpu_intervals);
-  cpu_lsw.walk();
-  cpu_lsw.finish_allocation();
-
-  if (has_fpu_registers()) {
-    // allocate fpu registers
-    LinearScanWalker fpu_lsw(this, precolored_fpu_intervals, not_precolored_fpu_intervals);
-    fpu_lsw.walk();
-    fpu_lsw.finish_allocation();
-  }
+	}
+
+	// allocate cpu registers
+	LinearScanWalker cpu_lsw(this, precolored_cpu_intervals, not_precolored_cpu_intervals);
+	cpu_lsw.walk();
+	cpu_lsw.finish_allocation();
+
+	if (has_fpu_registers()) {
+		// allocate fpu registers
+		LinearScanWalker fpu_lsw(this, precolored_fpu_intervals, not_precolored_fpu_intervals);
+		fpu_lsw.walk();
+		fpu_lsw.finish_allocation();
+	}
 }


@@ -1613,356 +1614,356 @@
 // wrapper for Interval::split_child_at_op_id that performs a bailout in product mode
 // instead of returning NULL
 Interval* LinearScan::split_child_at_op_id(Interval* interval, int op_id, LIR_OpVisitState::OprMode mode) {
-  Interval* result = interval->split_child_at_op_id(op_id, mode);
-  if (result != NULL) {
-    return result;
-  }
-
-  assert(false, "must find an interval, but do a clean bailout in product mode");
-  result = new Interval(LIR_OprDesc::vreg_base);
-  result->assign_reg(0);
-  result->set_type(T_INT);
-  BAILOUT_("LinearScan: interval is NULL", result);
+	Interval* result = interval->split_child_at_op_id(op_id, mode);
+	if (result != NULL) {
+		return result;
+	}
+
+	assert(false, "must find an interval, but do a clean bailout in product mode");
+	result = new Interval(LIR_OprDesc::vreg_base);
+	result->assign_reg(0);
+	result->set_type(T_INT);
+	BAILOUT_("LinearScan: interval is NULL", result);
 }


 Interval* LinearScan::interval_at_block_begin(BlockBegin* block, int reg_num) {
-  assert(LinearScan::nof_regs <= reg_num && reg_num < num_virtual_regs(), "register number out of bounds");
-  assert(interval_at(reg_num) != NULL, "no interval found");
-
-  return split_child_at_op_id(interval_at(reg_num), block->first_lir_instruction_id(), LIR_OpVisitState::outputMode);
+	assert(LinearScan::nof_regs <= reg_num && reg_num < num_virtual_regs(), "register number out of bounds");
+	assert(interval_at(reg_num) != NULL, "no interval found");
+
+	return split_child_at_op_id(interval_at(reg_num), block->first_lir_instruction_id(), LIR_OpVisitState::outputMode);
 }

 Interval* LinearScan::interval_at_block_end(BlockBegin* block, int reg_num) {
-  assert(LinearScan::nof_regs <= reg_num && reg_num < num_virtual_regs(), "register number out of bounds");
-  assert(interval_at(reg_num) != NULL, "no interval found");
-
-  return split_child_at_op_id(interval_at(reg_num), block->last_lir_instruction_id() + 1, LIR_OpVisitState::outputMode);
+	assert(LinearScan::nof_regs <= reg_num && reg_num < num_virtual_regs(), "register number out of bounds");
+	assert(interval_at(reg_num) != NULL, "no interval found");
+
+	return split_child_at_op_id(interval_at(reg_num), block->last_lir_instruction_id() + 1, LIR_OpVisitState::outputMode);
 }

 Interval* LinearScan::interval_at_op_id(int reg_num, int op_id) {
-  assert(LinearScan::nof_regs <= reg_num && reg_num < num_virtual_regs(), "register number out of bounds");
-  assert(interval_at(reg_num) != NULL, "no interval found");
-
-  return split_child_at_op_id(interval_at(reg_num), op_id, LIR_OpVisitState::inputMode);
+	assert(LinearScan::nof_regs <= reg_num && reg_num < num_virtual_regs(), "register number out of bounds");
+	assert(interval_at(reg_num) != NULL, "no interval found");
+
+	return split_child_at_op_id(interval_at(reg_num), op_id, LIR_OpVisitState::inputMode);
 }


 void LinearScan::resolve_collect_mappings(BlockBegin* from_block, BlockBegin* to_block, MoveResolver &move_resolver) {
-  DEBUG_ONLY(move_resolver.check_empty());
-
-  const int num_regs = num_virtual_regs();
-  const int size = live_set_size();
-  const BitMap live_at_edge = to_block->live_in();
-
-  // visit all registers where the live_at_edge bit is set
-  for (int r = (int)live_at_edge.get_next_one_offset(0, size); r < size; r = (int)live_at_edge.get_next_one_offset(r + 1, size)) {
-    assert(r < num_regs, "live information set for not exisiting interval");
-    assert(from_block->live_out().at(r) && to_block->live_in().at(r), "interval not live at this edge");
-
-    Interval* from_interval = interval_at_block_end(from_block, r);
-    Interval* to_interval = interval_at_block_begin(to_block, r);
-
-    if (from_interval != to_interval && (from_interval->assigned_reg() != to_interval->assigned_reg() || from_interval->assigned_regHi() != to_interval->assigned_regHi())) {
-      // need to insert move instruction
-      move_resolver.add_mapping(from_interval, to_interval);
-    }
-  }
+	DEBUG_ONLY(move_resolver.check_empty());
+
+	const int num_regs = num_virtual_regs();
+	const int size = live_set_size();
+	const BitMap live_at_edge = to_block->live_in();
+
+	// visit all registers where the live_at_edge bit is set
+	for (int r = (int)live_at_edge.get_next_one_offset(0, size); r < size; r = (int)live_at_edge.get_next_one_offset(r + 1, size)) {
+		assert(r < num_regs, "live information set for not exisiting interval");
+		assert(from_block->live_out().at(r) && to_block->live_in().at(r), "interval not live at this edge");
+
+		Interval* from_interval = interval_at_block_end(from_block, r);
+		Interval* to_interval = interval_at_block_begin(to_block, r);
+
+		if (from_interval != to_interval && (from_interval->assigned_reg() != to_interval->assigned_reg() || from_interval->assigned_regHi() != to_interval->assigned_regHi())) {
+			// need to insert move instruction
+			move_resolver.add_mapping(from_interval, to_interval);
+		}
+	}
 }


 void LinearScan::resolve_find_insert_pos(BlockBegin* from_block, BlockBegin* to_block, MoveResolver &move_resolver) {
-  if (from_block->number_of_sux() <= 1) {
-    TRACE_LINEAR_SCAN(4, tty->print_cr("inserting moves at end of from_block B%d", from_block->block_id()));
-
-    LIR_OpList* instructions = from_block->lir()->instructions_list();
-    LIR_OpBranch* branch = instructions->last()->as_OpBranch();
-    if (branch != NULL) {
-      // insert moves before branch
-      assert(branch->cond() == lir_cond_always, "block does not end with an unconditional jump");
-      move_resolver.set_insert_position(from_block->lir(), instructions->length() - 2);
-    } else {
-      move_resolver.set_insert_position(from_block->lir(), instructions->length() - 1);
-    }
-
-  } else {
-    TRACE_LINEAR_SCAN(4, tty->print_cr("inserting moves at beginning of to_block B%d", to_block->block_id()));
+	if (from_block->number_of_sux() <= 1) {
+		TRACE_LINEAR_SCAN(4, tty->print_cr("inserting moves at end of from_block B%d", from_block->block_id()));
+
+		LIR_OpList* instructions = from_block->lir()->instructions_list();
+		LIR_OpBranch* branch = instructions->last()->as_OpBranch();
+		if (branch != NULL) {
+			// insert moves before branch
+			assert(branch->cond() == lir_cond_always, "block does not end with an unconditional jump");
+			move_resolver.set_insert_position(from_block->lir(), instructions->length() - 2);
+		} else {
+			move_resolver.set_insert_position(from_block->lir(), instructions->length() - 1);
+		}
+
+	} else {
+		TRACE_LINEAR_SCAN(4, tty->print_cr("inserting moves at beginning of to_block B%d", to_block->block_id()));
 #ifdef ASSERT
-    assert(from_block->lir()->instructions_list()->at(0)->as_OpLabel() != NULL, "block does not start with a label");
-
-    // because the number of predecessor edges matches the number of
-    // successor edges, blocks which are reached by switch statements
-    // may have be more than one predecessor but it will be guaranteed
-    // that all predecessors will be the same.
-    for (int i = 0; i < to_block->number_of_preds(); i++) {
-      assert(from_block == to_block->pred_at(i), "all critical edges must be broken");
-    }
+		assert(from_block->lir()->instructions_list()->at(0)->as_OpLabel() != NULL, "block does not start with a label");
+
+		// because the number of predecessor edges matches the number of
+		// successor edges, blocks which are reached by switch statements
+		// may have be more than one predecessor but it will be guaranteed
+		// that all predecessors will be the same.
+		for (int i = 0; i < to_block->number_of_preds(); i++) {
+			assert(from_block == to_block->pred_at(i), "all critical edges must be broken");
+		}
 #endif

-    move_resolver.set_insert_position(to_block->lir(), 0);
-  }
+		move_resolver.set_insert_position(to_block->lir(), 0);
+	}
 }


 // insert necessary moves (spilling or reloading) at edges between blocks if interval has been split
 void LinearScan::resolve_data_flow() {
-  TIME_LINEAR_SCAN(timer_resolve_data_flow);
-
-  int num_blocks = block_count();
-  MoveResolver move_resolver(this);
-  BitMap block_completed(num_blocks);  block_completed.clear();
-  BitMap already_resolved(num_blocks); already_resolved.clear();
-
-  int i;
-  for (i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-
-    // check if block has only one predecessor and only one successor
-    if (block->number_of_preds() == 1 && block->number_of_sux() == 1 && block->number_of_exception_handlers() == 0) {
-      LIR_OpList* instructions = block->lir()->instructions_list();
-      assert(instructions->at(0)->code() == lir_label, "block must start with label");
-      assert(instructions->last()->code() == lir_branch, "block with successors must end with branch");
-      assert(instructions->last()->as_OpBranch()->cond() == lir_cond_always, "block with successor must end with unconditional branch");
-
-      // check if block is empty (only label and branch)
-      if (instructions->length() == 2) {
-        BlockBegin* pred = block->pred_at(0);
-        BlockBegin* sux = block->sux_at(0);
-
-        // prevent optimization of two consecutive blocks
-        if (!block_completed.at(pred->linear_scan_number()) && !block_completed.at(sux->linear_scan_number())) {
-          TRACE_LINEAR_SCAN(3, tty->print_cr("**** optimizing empty block B%d (pred: B%d, sux: B%d)", block->block_id(), pred->block_id(), sux->block_id()));
-          block_completed.set_bit(block->linear_scan_number());
-
-          // directly resolve between pred and sux (without looking at the empty block between)
-          resolve_collect_mappings(pred, sux, move_resolver);
-          if (move_resolver.has_mappings()) {
-            move_resolver.set_insert_position(block->lir(), 0);
-            move_resolver.resolve_and_append_moves();
-          }
-        }
-      }
-    }
-  }
-
-
-  for (i = 0; i < num_blocks; i++) {
-    if (!block_completed.at(i)) {
-      BlockBegin* from_block = block_at(i);
-      already_resolved.set_from(block_completed);
-
-      int num_sux = from_block->number_of_sux();
-      for (int s = 0; s < num_sux; s++) {
-        BlockBegin* to_block = from_block->sux_at(s);
-
-        // check for duplicate edges between the same blocks (can happen with switch blocks)
-        if (!already_resolved.at(to_block->linear_scan_number())) {
-          TRACE_LINEAR_SCAN(3, tty->print_cr("**** processing edge between B%d and B%d", from_block->block_id(), to_block->block_id()));
-          already_resolved.set_bit(to_block->linear_scan_number());
-
-          // collect all intervals that have been split between from_block and to_block
-          resolve_collect_mappings(from_block, to_block, move_resolver);
-          if (move_resolver.has_mappings()) {
-            resolve_find_insert_pos(from_block, to_block, move_resolver);
-            move_resolver.resolve_and_append_moves();
-          }
-        }
-      }
-    }
-  }
+	TIME_LINEAR_SCAN(timer_resolve_data_flow);
+
+	int num_blocks = block_count();
+	MoveResolver move_resolver(this);
+	BitMap block_completed(num_blocks);  block_completed.clear();
+	BitMap already_resolved(num_blocks); already_resolved.clear();
+
+	int i;
+	for (i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+
+		// check if block has only one predecessor and only one successor
+		if (block->number_of_preds() == 1 && block->number_of_sux() == 1 && block->number_of_exception_handlers() == 0) {
+			LIR_OpList* instructions = block->lir()->instructions_list();
+			assert(instructions->at(0)->code() == lir_label, "block must start with label");
+			assert(instructions->last()->code() == lir_branch, "block with successors must end with branch");
+			assert(instructions->last()->as_OpBranch()->cond() == lir_cond_always, "block with successor must end with unconditional branch");
+
+			// check if block is empty (only label and branch)
+			if (instructions->length() == 2) {
+				BlockBegin* pred = block->pred_at(0);
+				BlockBegin* sux = block->sux_at(0);
+
+				// prevent optimization of two consecutive blocks
+				if (!block_completed.at(pred->linear_scan_number()) && !block_completed.at(sux->linear_scan_number())) {
+					TRACE_LINEAR_SCAN(3, tty->print_cr("**** optimizing empty block B%d (pred: B%d, sux: B%d)", block->block_id(), pred->block_id(), sux->block_id()));
+					block_completed.set_bit(block->linear_scan_number());
+
+					// directly resolve between pred and sux (without looking at the empty block between)
+					resolve_collect_mappings(pred, sux, move_resolver);
+					if (move_resolver.has_mappings()) {
+						move_resolver.set_insert_position(block->lir(), 0);
+						move_resolver.resolve_and_append_moves();
+					}
+				}
+			}
+		}
+	}
+
+
+	for (i = 0; i < num_blocks; i++) {
+		if (!block_completed.at(i)) {
+			BlockBegin* from_block = block_at(i);
+			already_resolved.set_from(block_completed);
+
+			int num_sux = from_block->number_of_sux();
+			for (int s = 0; s < num_sux; s++) {
+				BlockBegin* to_block = from_block->sux_at(s);
+
+				// check for duplicate edges between the same blocks (can happen with switch blocks)
+				if (!already_resolved.at(to_block->linear_scan_number())) {
+					TRACE_LINEAR_SCAN(3, tty->print_cr("**** processing edge between B%d and B%d", from_block->block_id(), to_block->block_id()));
+					already_resolved.set_bit(to_block->linear_scan_number());
+
+					// collect all intervals that have been split between from_block and to_block
+					resolve_collect_mappings(from_block, to_block, move_resolver);
+					if (move_resolver.has_mappings()) {
+						resolve_find_insert_pos(from_block, to_block, move_resolver);
+						move_resolver.resolve_and_append_moves();
+					}
+				}
+			}
+		}
+	}
 }


 void LinearScan::resolve_exception_entry(BlockBegin* block, int reg_num, MoveResolver &move_resolver) {
-  if (interval_at(reg_num) == NULL) {
-    // if a phi function is never used, no interval is created -> ignore this
-    return;
-  }
-
-  Interval* interval = interval_at_block_begin(block, reg_num);
-  int reg = interval->assigned_reg();
-  int regHi = interval->assigned_regHi();
-
-  if ((reg < nof_regs && interval->always_in_memory()) ||
-      (use_fpu_stack_allocation() && reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg)) {
-    // the interval is split to get a short range that is located on the stack
-    // in the following two cases:
-    // * the interval started in memory (e.g. method parameter), but is currently in a register
-    //   this is an optimization for exception handling that reduces the number of moves that
-    //   are necessary for resolving the states when an exception uses this exception handler
-    // * the interval would be on the fpu stack at the begin of the exception handler
-    //   this is not allowed because of the complicated fpu stack handling on Intel
-
-    // range that will be spilled to memory
-    int from_op_id = block->first_lir_instruction_id();
-    int to_op_id = from_op_id + 1;  // short live range of length 1
-    assert(interval->from() <= from_op_id && interval->to() >= to_op_id,
-           "no split allowed between exception entry and first instruction");
-
-    if (interval->from() != from_op_id) {
-      // the part before from_op_id is unchanged
-      interval = interval->split(from_op_id);
-      interval->assign_reg(reg, regHi);
-      append_interval(interval);
-    }
-    assert(interval->from() == from_op_id, "must be true now");
-
-    Interval* spilled_part = interval;
-    if (interval->to() != to_op_id) {
-      // the part after to_op_id is unchanged
-      spilled_part = interval->split_from_start(to_op_id);
-      append_interval(spilled_part);
-      move_resolver.add_mapping(spilled_part, interval);
-    }
-    assign_spill_slot(spilled_part);
-
-    assert(spilled_part->from() == from_op_id && spilled_part->to() == to_op_id, "just checking");
-  }
+	if (interval_at(reg_num) == NULL) {
+		// if a phi function is never used, no interval is created -> ignore this
+		return;
+	}
+
+	Interval* interval = interval_at_block_begin(block, reg_num);
+	int reg = interval->assigned_reg();
+	int regHi = interval->assigned_regHi();
+
+	if ((reg < nof_regs && interval->always_in_memory()) ||
+			(use_fpu_stack_allocation() && reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg)) {
+		// the interval is split to get a short range that is located on the stack
+		// in the following two cases:
+		// * the interval started in memory (e.g. method parameter), but is currently in a register
+		//   this is an optimization for exception handling that reduces the number of moves that
+		//   are necessary for resolving the states when an exception uses this exception handler
+		// * the interval would be on the fpu stack at the begin of the exception handler
+		//   this is not allowed because of the complicated fpu stack handling on Intel
+
+		// range that will be spilled to memory
+		int from_op_id = block->first_lir_instruction_id();
+		int to_op_id = from_op_id + 1;  // short live range of length 1
+		assert(interval->from() <= from_op_id && interval->to() >= to_op_id,
+				"no split allowed between exception entry and first instruction");
+
+		if (interval->from() != from_op_id) {
+			// the part before from_op_id is unchanged
+			interval = interval->split(from_op_id);
+			interval->assign_reg(reg, regHi);
+			append_interval(interval);
+		}
+		assert(interval->from() == from_op_id, "must be true now");
+
+		Interval* spilled_part = interval;
+		if (interval->to() != to_op_id) {
+			// the part after to_op_id is unchanged
+			spilled_part = interval->split_from_start(to_op_id);
+			append_interval(spilled_part);
+			move_resolver.add_mapping(spilled_part, interval);
+		}
+		assign_spill_slot(spilled_part);
+
+		assert(spilled_part->from() == from_op_id && spilled_part->to() == to_op_id, "just checking");
+	}
 }

 void LinearScan::resolve_exception_entry(BlockBegin* block, MoveResolver &move_resolver) {
-  assert(block->is_set(BlockBegin::exception_entry_flag), "should not call otherwise");
-  DEBUG_ONLY(move_resolver.check_empty());
-
-  // visit all registers where the live_in bit is set
-  int size = live_set_size();
-  for (int r = (int)block->live_in().get_next_one_offset(0, size); r < size; r = (int)block->live_in().get_next_one_offset(r + 1, size)) {
-    resolve_exception_entry(block, r, move_resolver);
-  }
-
-  // the live_in bits are not set for phi functions of the xhandler entry, so iterate them separately
-  for_each_phi_fun(block, phi,
-    resolve_exception_entry(block, phi->operand()->vreg_number(), move_resolver)
-  );
-
-  if (move_resolver.has_mappings()) {
-    // insert moves after first instruction
-    move_resolver.set_insert_position(block->lir(), 1);
-    move_resolver.resolve_and_append_moves();
-  }
+	assert(block->is_set(BlockBegin::exception_entry_flag), "should not call otherwise");
+	DEBUG_ONLY(move_resolver.check_empty());
+
+	// visit all registers where the live_in bit is set
+	int size = live_set_size();
+	for (int r = (int)block->live_in().get_next_one_offset(0, size); r < size; r = (int)block->live_in().get_next_one_offset(r + 1, size)) {
+		resolve_exception_entry(block, r, move_resolver);
+	}
+
+	// the live_in bits are not set for phi functions of the xhandler entry, so iterate them separately
+	for_each_phi_fun(block, phi,
+			resolve_exception_entry(block, phi->operand()->vreg_number(), move_resolver)
+			);
+
+	if (move_resolver.has_mappings()) {
+		// insert moves after first instruction
+		move_resolver.set_insert_position(block->lir(), 1);
+		move_resolver.resolve_and_append_moves();
+	}
 }


 void LinearScan::resolve_exception_edge(XHandler* handler, int throwing_op_id, int reg_num, Phi* phi, MoveResolver &move_resolver) {
-  if (interval_at(reg_num) == NULL) {
-    // if a phi function is never used, no interval is created -> ignore this
-    return;
-  }
-
-  // the computation of to_interval is equal to resolve_collect_mappings,
-  // but from_interval is more complicated because of phi functions
-  BlockBegin* to_block = handler->entry_block();
-  Interval* to_interval = interval_at_block_begin(to_block, reg_num);
-
-  if (phi != NULL) {
-    // phi function of the exception entry block
-    // no moves are created for this phi function in the LIR_Generator, so the
-    // interval at the throwing instruction must be searched using the operands
-    // of the phi function
-    Value from_value = phi->operand_at(handler->phi_operand());
-
-    // with phi functions it can happen that the same from_value is used in
-    // multiple mappings, so notify move-resolver that this is allowed
-    move_resolver.set_multiple_reads_allowed();
-
-    Constant* con = from_value->as_Constant();
-    if (con != NULL && !con->is_pinned()) {
-      // unpinned constants may have no register, so add mapping from constant to interval
-      move_resolver.add_mapping(LIR_OprFact::value_type(con->type()), to_interval);
-    } else {
-      // search split child at the throwing op_id
-      Interval* from_interval = interval_at_op_id(from_value->operand()->vreg_number(), throwing_op_id);
-      move_resolver.add_mapping(from_interval, to_interval);
-    }
-
-  } else {
-    // no phi function, so use reg_num also for from_interval
-    // search split child at the throwing op_id
-    Interval* from_interval = interval_at_op_id(reg_num, throwing_op_id);
-    if (from_interval != to_interval) {
-      // optimization to reduce number of moves: when to_interval is on stack and
-      // the stack slot is known to be always correct, then no move is necessary
-      if (!from_interval->always_in_memory() || from_interval->canonical_spill_slot() != to_interval->assigned_reg()) {
-        move_resolver.add_mapping(from_interval, to_interval);
-      }
-    }
-  }
+	if (interval_at(reg_num) == NULL) {
+		// if a phi function is never used, no interval is created -> ignore this
+		return;
+	}
+
+	// the computation of to_interval is equal to resolve_collect_mappings,
+	// but from_interval is more complicated because of phi functions
+	BlockBegin* to_block = handler->entry_block();
+	Interval* to_interval = interval_at_block_begin(to_block, reg_num);
+
+	if (phi != NULL) {
+		// phi function of the exception entry block
+		// no moves are created for this phi function in the LIR_Generator, so the
+		// interval at the throwing instruction must be searched using the operands
+		// of the phi function
+		Value from_value = phi->operand_at(handler->phi_operand());
+
+		// with phi functions it can happen that the same from_value is used in
+		// multiple mappings, so notify move-resolver that this is allowed
+		move_resolver.set_multiple_reads_allowed();
+
+		Constant* con = from_value->as_Constant();
+		if (con != NULL && !con->is_pinned()) {
+			// unpinned constants may have no register, so add mapping from constant to interval
+			move_resolver.add_mapping(LIR_OprFact::value_type(con->type()), to_interval);
+		} else {
+			// search split child at the throwing op_id
+			Interval* from_interval = interval_at_op_id(from_value->operand()->vreg_number(), throwing_op_id);
+			move_resolver.add_mapping(from_interval, to_interval);
+		}
+
+	} else {
+		// no phi function, so use reg_num also for from_interval
+		// search split child at the throwing op_id
+		Interval* from_interval = interval_at_op_id(reg_num, throwing_op_id);
+		if (from_interval != to_interval) {
+			// optimization to reduce number of moves: when to_interval is on stack and
+			// the stack slot is known to be always correct, then no move is necessary
+			if (!from_interval->always_in_memory() || from_interval->canonical_spill_slot() != to_interval->assigned_reg()) {
+				move_resolver.add_mapping(from_interval, to_interval);
+			}
+		}
+	}
 }

 void LinearScan::resolve_exception_edge(XHandler* handler, int throwing_op_id, MoveResolver &move_resolver) {
-  TRACE_LINEAR_SCAN(4, tty->print_cr("resolving exception handler B%d: throwing_op_id=%d", handler->entry_block()->block_id(), throwing_op_id));
-
-  DEBUG_ONLY(move_resolver.check_empty());
-  assert(handler->lir_op_id() == -1, "already processed this xhandler");
-  DEBUG_ONLY(handler->set_lir_op_id(throwing_op_id));
-  assert(handler->entry_code() == NULL, "code already present");
-
-  // visit all registers where the live_in bit is set
-  BlockBegin* block = handler->entry_block();
-  int size = live_set_size();
-  for (int r = (int)block->live_in().get_next_one_offset(0, size); r < size; r = (int)block->live_in().get_next_one_offset(r + 1, size)) {
-    resolve_exception_edge(handler, throwing_op_id, r, NULL, move_resolver);
-  }
-
-  // the live_in bits are not set for phi functions of the xhandler entry, so iterate them separately
-  for_each_phi_fun(block, phi,
-    resolve_exception_edge(handler, throwing_op_id, phi->operand()->vreg_number(), phi, move_resolver)
-  );
-
-  if (move_resolver.has_mappings()) {
-    LIR_List* entry_code = new LIR_List(compilation());
-    move_resolver.set_insert_position(entry_code, 0);
-    move_resolver.resolve_and_append_moves();
-
-    entry_code->jump(handler->entry_block());
-    handler->set_entry_code(entry_code);
-  }
+	TRACE_LINEAR_SCAN(4, tty->print_cr("resolving exception handler B%d: throwing_op_id=%d", handler->entry_block()->block_id(), throwing_op_id));
+
+	DEBUG_ONLY(move_resolver.check_empty());
+	assert(handler->lir_op_id() == -1, "already processed this xhandler");
+	DEBUG_ONLY(handler->set_lir_op_id(throwing_op_id));
+	assert(handler->entry_code() == NULL, "code already present");
+
+	// visit all registers where the live_in bit is set
+	BlockBegin* block = handler->entry_block();
+	int size = live_set_size();
+	for (int r = (int)block->live_in().get_next_one_offset(0, size); r < size; r = (int)block->live_in().get_next_one_offset(r + 1, size)) {
+		resolve_exception_edge(handler, throwing_op_id, r, NULL, move_resolver);
+	}
+
+	// the live_in bits are not set for phi functions of the xhandler entry, so iterate them separately
+	for_each_phi_fun(block, phi,
+			resolve_exception_edge(handler, throwing_op_id, phi->operand()->vreg_number(), phi, move_resolver)
+			);
+
+	if (move_resolver.has_mappings()) {
+		LIR_List* entry_code = new LIR_List(compilation());
+		move_resolver.set_insert_position(entry_code, 0);
+		move_resolver.resolve_and_append_moves();
+
+		entry_code->jump(handler->entry_block());
+		handler->set_entry_code(entry_code);
+	}
 }


 void LinearScan::resolve_exception_handlers() {
-  MoveResolver move_resolver(this);
-  LIR_OpVisitState visitor;
-  int num_blocks = block_count();
-
-  int i;
-  for (i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-    if (block->is_set(BlockBegin::exception_entry_flag)) {
-      resolve_exception_entry(block, move_resolver);
-    }
-  }
-
-  for (i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-    LIR_List* ops = block->lir();
-    int num_ops = ops->length();
-
-    // iterate all instructions of the block. skip the first because it is always a label
-    assert(visitor.no_operands(ops->at(0)), "first operation must always be a label");
-    for (int j = 1; j < num_ops; j++) {
-      LIR_Op* op = ops->at(j);
-      int op_id = op->id();
-
-      if (op_id != -1 && has_info(op_id)) {
-        // visit operation to collect all operands
-        visitor.visit(op);
-        assert(visitor.info_count() > 0, "should not visit otherwise");
-
-        XHandlers* xhandlers = visitor.all_xhandler();
-        int n = xhandlers->length();
-        for (int k = 0; k < n; k++) {
-          resolve_exception_edge(xhandlers->handler_at(k), op_id, move_resolver);
-        }
+	MoveResolver move_resolver(this);
+	LIR_OpVisitState visitor;
+	int num_blocks = block_count();
+
+	int i;
+	for (i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+		if (block->is_set(BlockBegin::exception_entry_flag)) {
+			resolve_exception_entry(block, move_resolver);
+		}
+	}
+
+	for (i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+		LIR_List* ops = block->lir();
+		int num_ops = ops->length();
+
+		// iterate all instructions of the block. skip the first because it is always a label
+		assert(visitor.no_operands(ops->at(0)), "first operation must always be a label");
+		for (int j = 1; j < num_ops; j++) {
+			LIR_Op* op = ops->at(j);
+			int op_id = op->id();
+
+			if (op_id != -1 && has_info(op_id)) {
+				// visit operation to collect all operands
+				visitor.visit(op);
+				assert(visitor.info_count() > 0, "should not visit otherwise");
+
+				XHandlers* xhandlers = visitor.all_xhandler();
+				int n = xhandlers->length();
+				for (int k = 0; k < n; k++) {
+					resolve_exception_edge(xhandlers->handler_at(k), op_id, move_resolver);
+				}

 #ifdef ASSERT
-      } else {
-        visitor.visit(op);
-        assert(visitor.all_xhandler()->length() == 0, "missed exception handler");
+			} else {
+				visitor.visit(op);
+				assert(visitor.all_xhandler()->length() == 0, "missed exception handler");
 #endif
-      }
-    }
-  }
+			}
+		}
+	}
 }


@@ -1970,177 +1971,181 @@
 // (includes computation of debug information and oop maps)

 VMReg LinearScan::vm_reg_for_interval(Interval* interval) {
-  VMReg reg = interval->cached_vm_reg();
-  if (!reg->is_valid() ) {
-    reg = vm_reg_for_operand(operand_for_interval(interval));
-    interval->set_cached_vm_reg(reg);
-  }
-  assert(reg == vm_reg_for_operand(operand_for_interval(interval)), "wrong cached value");
-  return reg;
+	VMReg reg = interval->cached_vm_reg();
+	if (!reg->is_valid() ) {
+		reg = vm_reg_for_operand(operand_for_interval(interval));
+		interval->set_cached_vm_reg(reg);
+	}
+	assert(reg == vm_reg_for_operand(operand_for_interval(interval)), "wrong cached value");
+	return reg;
 }

 VMReg LinearScan::vm_reg_for_operand(LIR_Opr opr) {
-  assert(opr->is_oop(), "currently only implemented for oop operands");
-  return frame_map()->regname(opr);
+	assert(opr->is_oop(), "currently only implemented for oop operands");
+	return frame_map()->regname(opr);
 }


 LIR_Opr LinearScan::operand_for_interval(Interval* interval) {
-  LIR_Opr opr = interval->cached_opr();
-  if (opr->is_illegal()) {
-    opr = calc_operand_for_interval(interval);
-    interval->set_cached_opr(opr);
-  }
-
-  assert(opr == calc_operand_for_interval(interval), "wrong cached value");
-  return opr;
+	LIR_Opr opr = interval->cached_opr();
+	if (opr->is_illegal()) {
+		opr = calc_operand_for_interval(interval);
+		interval->set_cached_opr(opr);
+	}
+
+	assert(opr == calc_operand_for_interval(interval), "wrong cached value");
+	return opr;
 }

 LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) {
-  int assigned_reg = interval->assigned_reg();
-  BasicType type = interval->type();
-
-  if (assigned_reg >= nof_regs) {
-    // stack slot
-    assert(interval->assigned_regHi() == any_reg, "must not have hi register");
-    return LIR_OprFact::stack(assigned_reg - nof_regs, type);
-
-  } else {
-    // register
-    switch (type) {
-      case T_OBJECT: {
-        assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register");
-        assert(interval->assigned_regHi() == any_reg, "must not have hi register");
-        return LIR_OprFact::single_cpu_oop(assigned_reg);
-      }
-
-      case T_INT: {
-        assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register");
-        assert(interval->assigned_regHi() == any_reg, "must not have hi register");
-        return LIR_OprFact::single_cpu(assigned_reg);
-      }
-
-      case T_LONG: {
-        int assigned_regHi = interval->assigned_regHi();
-        assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register");
-        assert(num_physical_regs(T_LONG) == 1 ||
-               (assigned_regHi >= pd_first_cpu_reg && assigned_regHi <= pd_last_cpu_reg), "no cpu register");
-
-        assert(assigned_reg != assigned_regHi, "invalid allocation");
-        assert(num_physical_regs(T_LONG) == 1 || assigned_reg < assigned_regHi,
-               "register numbers must be sorted (ensure that e.g. a move from eax,ebx to ebx,eax can not occur)");
-        assert((assigned_regHi != any_reg) ^ (num_physical_regs(T_LONG) == 1), "must be match");
-        if (requires_adjacent_regs(T_LONG)) {
-          assert(assigned_reg % 2 == 0 && assigned_reg + 1 == assigned_regHi, "must be sequential and even");
-        }
+	int assigned_reg = interval->assigned_reg();
+	BasicType type = interval->type();
+
+	if (assigned_reg >= nof_regs) {
+		// stack slot
+		assert(interval->assigned_regHi() == any_reg, "must not have hi register");
+		return LIR_OprFact::stack(assigned_reg - nof_regs, type);
+
+	} else {
+		// register
+		int assigned_regHi = interval->assigned_regHi();
+		switch (type) {
+			case T_OBJECT:
+				assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register");
+				assert(interval->assigned_regHi() == any_reg, "must not have hi register");
+				return LIR_OprFact::single_cpu_oop(assigned_reg);
+
+
+			case T_INT:
+				assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register");
+				assert(interval->assigned_regHi() == any_reg, "must not have hi register");
+				return LIR_OprFact::single_cpu(assigned_reg);
+
+
+			case T_LONG:
+				assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register");
+				assert(num_physical_regs(T_LONG) == 1 ||
+						(assigned_regHi >= pd_first_cpu_reg && assigned_regHi <= pd_last_cpu_reg), "no cpu register");
+
+				assert(assigned_reg != assigned_regHi, "invalid allocation");
+				assert(num_physical_regs(T_LONG) == 1 || assigned_reg < assigned_regHi,
+						"register numbers must be sorted (ensure that e.g. a move from eax,ebx to ebx,eax can not occur)");
+				assert((assigned_regHi != any_reg) ^ (num_physical_regs(T_LONG) == 1), "must be match");
+				if (requires_adjacent_regs(T_LONG)) {
+					assert(assigned_reg % 2 == 0 && assigned_reg + 1 == assigned_regHi, "must be sequential and even");
+				}

 #ifdef _LP64
-        return LIR_OprFact::double_cpu(assigned_reg, assigned_reg);
+				return LIR_OprFact::double_cpu(assigned_reg, assigned_reg);
 #else
 #ifdef SPARC
-        return LIR_OprFact::double_cpu(assigned_regHi, assigned_reg);
+				return LIR_OprFact::double_cpu(assigned_regHi, assigned_reg);
 #else
-        return LIR_OprFact::double_cpu(assigned_reg, assigned_regHi);
+				return LIR_OprFact::double_cpu(assigned_reg, assigned_regHi);
 #endif // SPARC
 #endif // LP64
-      }
-
-      case T_FLOAT: {
+
+
+			case T_FLOAT:
 #ifdef X86
-        if (UseSSE >= 1) {
-          assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= pd_last_xmm_reg, "no xmm register");
-          assert(interval->assigned_regHi() == any_reg, "must not have hi register");
-          return LIR_OprFact::single_xmm(assigned_reg - pd_first_xmm_reg);
-        }
+				if (UseSSE >= 1) {
+					assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= pd_last_xmm_reg, "no xmm register");
+					assert(interval->assigned_regHi() == any_reg, "must not have hi register");
+					return LIR_OprFact::single_xmm(assigned_reg - pd_first_xmm_reg);
+				}
 #endif

-        assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
-        assert(interval->assigned_regHi() == any_reg, "must not have hi register");
-        return LIR_OprFact::single_fpu(assigned_reg - pd_first_fpu_reg);
-      }
-
-      case T_DOUBLE: {
+				assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
+#ifndef MIPS32
+				assert(interval->assigned_regHi() == any_reg, "must not have hi register");
+#endif
+				return LIR_OprFact::single_fpu(assigned_reg - pd_first_fpu_reg);
+
+
+			case T_DOUBLE:
 #ifdef X86
-        if (UseSSE >= 2) {
-          assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= pd_last_xmm_reg, "no xmm register");
-          assert(interval->assigned_regHi() == any_reg, "must not have hi register (double xmm values are stored in one register)");
-          return LIR_OprFact::double_xmm(assigned_reg - pd_first_xmm_reg);
-        }
+				if (UseSSE >= 2) {
+					assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= pd_last_xmm_reg, "no xmm register");
+					assert(interval->assigned_regHi() == any_reg, "must not have hi register (double xmm values are stored in one register)");
+					return LIR_OprFact::double_xmm(assigned_reg - pd_first_xmm_reg);
+				}
 #endif

 #ifdef SPARC
-        assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
-        assert(interval->assigned_regHi() >= pd_first_fpu_reg && interval->assigned_regHi() <= pd_last_fpu_reg, "no fpu register");
-        assert(assigned_reg % 2 == 0 && assigned_reg + 1 == interval->assigned_regHi(), "must be sequential and even");
-        LIR_Opr result = LIR_OprFact::double_fpu(interval->assigned_regHi() - pd_first_fpu_reg, assigned_reg - pd_first_fpu_reg);
+				assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
+				assert(interval->assigned_regHi() >= pd_first_fpu_reg && interval->assigned_regHi() <= pd_last_fpu_reg, "no fpu register");
+				assert(assigned_reg % 2 == 0 && assigned_reg + 1 == interval->assigned_regHi(), "must be sequential and even");
+				return LIR_OprFact::double_fpu(interval->assigned_regHi() - pd_first_fpu_reg, assigned_reg - pd_first_fpu_reg);
 #else
-        assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
-        assert(interval->assigned_regHi() == any_reg, "must not have hi register (double fpu values are stored in one register on Intel)");
-        LIR_Opr result = LIR_OprFact::double_fpu(assigned_reg - pd_first_fpu_reg);
+				assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register");
+#ifndef MIPS32
+				assert(interval->assigned_regHi() == any_reg, "must not have hi register (double fpu values are stored in one register on Intel)");
 #endif
-        return result;
-      }
-
-      default: {
-        ShouldNotReachHere();
-        return LIR_OprFact::illegalOpr;
-      }
-    }
-  }
+				return LIR_OprFact::double_fpu(assigned_reg - pd_first_fpu_reg);
+#endif
+
+
+			default:
+				ShouldNotReachHere();
+				return LIR_OprFact::illegalOpr;
+
+		}
+	}
 }

 LIR_Opr LinearScan::canonical_spill_opr(Interval* interval) {
-  assert(interval->canonical_spill_slot() >= nof_regs, "canonical spill slot not set");
-  return LIR_OprFact::stack(interval->canonical_spill_slot() - nof_regs, interval->type());
+	assert(interval->canonical_spill_slot() >= nof_regs, "canonical spill slot not set");
+	return LIR_OprFact::stack(interval->canonical_spill_slot() - nof_regs, interval->type());
 }

 LIR_Opr LinearScan::color_lir_opr(LIR_Opr opr, int op_id, LIR_OpVisitState::OprMode mode) {
-  assert(opr->is_virtual(), "should not call this otherwise");
-
-  Interval* interval = interval_at(opr->vreg_number());
-  assert(interval != NULL, "interval must exist");
-
-  if (op_id != -1) {
+	assert(opr->is_virtual(), "should not call this otherwise");
+
+	Interval* interval = interval_at(opr->vreg_number());
+	assert(interval != NULL, "interval must exist");
+
+	if (op_id != -1) {
 #ifdef ASSERT
-    BlockBegin* block = block_of_op_with_id(op_id);
-    if (block->number_of_sux() <= 1 && op_id == block->last_lir_instruction_id()) {
-      // check if spill moves could have been appended at the end of this block, but
-      // before the branch instruction. So the split child information for this branch would
-      // be incorrect.
-      LIR_OpBranch* branch = block->lir()->instructions_list()->last()->as_OpBranch();
-      if (branch != NULL) {
-        if (block->live_out().at(opr->vreg_number())) {
-          assert(branch->cond() == lir_cond_always, "block does not end with an unconditional jump");
-          assert(false, "can't get split child for the last branch of a block because the information would be incorrect (moves are inserted before the branch in resolve_data_flow)");
-        }
-      }
-    }
+		BlockBegin* block = block_of_op_with_id(op_id);
+		if (block->number_of_sux() <= 1 && op_id == block->last_lir_instruction_id()) {
+			// check if spill moves could have been appended at the end of this block, but
+			// before the branch instruction. So the split child information for this branch would
+			// be incorrect.
+			LIR_OpBranch* branch = block->lir()->instructions_list()->last()->as_OpBranch();
+			if (branch != NULL) {
+				if (block->live_out().at(opr->vreg_number())) {
+					assert(branch->cond() == lir_cond_always, "block does not end with an unconditional jump");
+					assert(false, "can't get split child for the last branch of a block because the information would be incorrect (moves are inserted before the branch in resolve_data_flow)");
+				}
+			}
+		}
 #endif

-    // operands are not changed when an interval is split during allocation,
-    // so search the right interval here
-    interval = split_child_at_op_id(interval, op_id, mode);
-  }
-
-  LIR_Opr res = operand_for_interval(interval);
+		// operands are not changed when an interval is split during allocation,
+		// so search the right interval here
+		interval = split_child_at_op_id(interval, op_id, mode);
+	}
+
+	LIR_Opr res = operand_for_interval(interval);

 #ifdef X86
-  // new semantic for is_last_use: not only set on definite end of interval,
-  // but also before hole
-  // This may still miss some cases (e.g. for dead values), but it is not necessary that the
-  // last use information is completely correct
-  // information is only needed for fpu stack allocation
-  if (res->is_fpu_register()) {
-    if (opr->is_last_use() || op_id == interval->to() || (op_id != -1 && interval->has_hole_between(op_id, op_id + 1))) {
-      assert(op_id == -1 || !is_block_begin(op_id), "holes at begin of block may also result from control flow");
-      res = res->make_last_use();
-    }
-  }
+	// new semantic for is_last_use: not only set on definite end of interval,
+	// but also before hole
+	// This may still miss some cases (e.g. for dead values), but it is not necessary that the
+	// last use information is completely correct
+	// information is only needed for fpu stack allocation
+	if (res->is_fpu_register()) {
+		if (opr->is_last_use() || op_id == interval->to() || (op_id != -1 && interval->has_hole_between(op_id, op_id + 1))) {
+			assert(op_id == -1 || !is_block_begin(op_id), "holes at begin of block may also result from control flow");
+			res = res->make_last_use();
+		}
+	}
 #endif

-  assert(!gen()->is_vreg_flag_set(opr->vreg_number(), LIRGenerator::callee_saved) || !FrameMap::is_caller_save_register(res), "bad allocation");
-
-  return res;
+#if 0
+	assert(!gen()->is_vreg_flag_set(opr->vreg_number(), LIRGenerator::callee_saved) || !FrameMap::is_caller_save_register(res), "bad allocation");
+#endif
+	return res;
 }


@@ -2148,252 +2153,252 @@
 // some methods used to check correctness of debug information

 void assert_no_register_values(GrowableArray<ScopeValue*>* values) {
-  if (values == NULL) {
-    return;
-  }
-
-  for (int i = 0; i < values->length(); i++) {
-    ScopeValue* value = values->at(i);
-
-    if (value->is_location()) {
-      Location location = ((LocationValue*)value)->location();
-      assert(location.where() == Location::on_stack, "value is in register");
-    }
-  }
+	if (values == NULL) {
+		return;
+	}
+
+	for (int i = 0; i < values->length(); i++) {
+		ScopeValue* value = values->at(i);
+
+		if (value->is_location()) {
+			Location location = ((LocationValue*)value)->location();
+			assert(location.where() == Location::on_stack, "value is in register");
+		}
+	}
 }

 void assert_no_register_values(GrowableArray<MonitorValue*>* values) {
-  if (values == NULL) {
-    return;
-  }
-
-  for (int i = 0; i < values->length(); i++) {
-    MonitorValue* value = values->at(i);
-
-    if (value->owner()->is_location()) {
-      Location location = ((LocationValue*)value->owner())->location();
-      assert(location.where() == Location::on_stack, "owner is in register");
-    }
-    assert(value->basic_lock().where() == Location::on_stack, "basic_lock is in register");
-  }
+	if (values == NULL) {
+		return;
+	}
+
+	for (int i = 0; i < values->length(); i++) {
+		MonitorValue* value = values->at(i);
+
+		if (value->owner()->is_location()) {
+			Location location = ((LocationValue*)value->owner())->location();
+			assert(location.where() == Location::on_stack, "owner is in register");
+		}
+		assert(value->basic_lock().where() == Location::on_stack, "basic_lock is in register");
+	}
 }

 void assert_equal(Location l1, Location l2) {
-  assert(l1.where() == l2.where() && l1.type() == l2.type() && l1.offset() == l2.offset(), "");
+	assert(l1.where() == l2.where() && l1.type() == l2.type() && l1.offset() == l2.offset(), "");
 }

 void assert_equal(ScopeValue* v1, ScopeValue* v2) {
-  if (v1->is_location()) {
-    assert(v2->is_location(), "");
-    assert_equal(((LocationValue*)v1)->location(), ((LocationValue*)v2)->location());
-  } else if (v1->is_constant_int()) {
-    assert(v2->is_constant_int(), "");
-    assert(((ConstantIntValue*)v1)->value() == ((ConstantIntValue*)v2)->value(), "");
-  } else if (v1->is_constant_double()) {
-    assert(v2->is_constant_double(), "");
-    assert(((ConstantDoubleValue*)v1)->value() == ((ConstantDoubleValue*)v2)->value(), "");
-  } else if (v1->is_constant_long()) {
-    assert(v2->is_constant_long(), "");
-    assert(((ConstantLongValue*)v1)->value() == ((ConstantLongValue*)v2)->value(), "");
-  } else if (v1->is_constant_oop()) {
-    assert(v2->is_constant_oop(), "");
-    assert(((ConstantOopWriteValue*)v1)->value() == ((ConstantOopWriteValue*)v2)->value(), "");
-  } else {
-    ShouldNotReachHere();
-  }
+	if (v1->is_location()) {
+		assert(v2->is_location(), "");
+		assert_equal(((LocationValue*)v1)->location(), ((LocationValue*)v2)->location());
+	} else if (v1->is_constant_int()) {
+		assert(v2->is_constant_int(), "");
+		assert(((ConstantIntValue*)v1)->value() == ((ConstantIntValue*)v2)->value(), "");
+	} else if (v1->is_constant_double()) {
+		assert(v2->is_constant_double(), "");
+		assert(((ConstantDoubleValue*)v1)->value() == ((ConstantDoubleValue*)v2)->value(), "");
+	} else if (v1->is_constant_long()) {
+		assert(v2->is_constant_long(), "");
+		assert(((ConstantLongValue*)v1)->value() == ((ConstantLongValue*)v2)->value(), "");
+	} else if (v1->is_constant_oop()) {
+		assert(v2->is_constant_oop(), "");
+		assert(((ConstantOopWriteValue*)v1)->value() == ((ConstantOopWriteValue*)v2)->value(), "");
+	} else {
+		ShouldNotReachHere();
+	}
 }

 void assert_equal(MonitorValue* m1, MonitorValue* m2) {
-  assert_equal(m1->owner(), m2->owner());
-  assert_equal(m1->basic_lock(), m2->basic_lock());
+	assert_equal(m1->owner(), m2->owner());
+	assert_equal(m1->basic_lock(), m2->basic_lock());
 }

 void assert_equal(IRScopeDebugInfo* d1, IRScopeDebugInfo* d2) {
-  assert(d1->scope() == d2->scope(), "not equal");
-  assert(d1->bci() == d2->bci(), "not equal");
-
-  if (d1->locals() != NULL) {
-    assert(d1->locals() != NULL && d2->locals() != NULL, "not equal");
-    assert(d1->locals()->length() == d2->locals()->length(), "not equal");
-    for (int i = 0; i < d1->locals()->length(); i++) {
-      assert_equal(d1->locals()->at(i), d2->locals()->at(i));
-    }
-  } else {
-    assert(d1->locals() == NULL && d2->locals() == NULL, "not equal");
-  }
-
-  if (d1->expressions() != NULL) {
-    assert(d1->expressions() != NULL && d2->expressions() != NULL, "not equal");
-    assert(d1->expressions()->length() == d2->expressions()->length(), "not equal");
-    for (int i = 0; i < d1->expressions()->length(); i++) {
-      assert_equal(d1->expressions()->at(i), d2->expressions()->at(i));
-    }
-  } else {
-    assert(d1->expressions() == NULL && d2->expressions() == NULL, "not equal");
-  }
-
-  if (d1->monitors() != NULL) {
-    assert(d1->monitors() != NULL && d2->monitors() != NULL, "not equal");
-    assert(d1->monitors()->length() == d2->monitors()->length(), "not equal");
-    for (int i = 0; i < d1->monitors()->length(); i++) {
-      assert_equal(d1->monitors()->at(i), d2->monitors()->at(i));
-    }
-  } else {
-    assert(d1->monitors() == NULL && d2->monitors() == NULL, "not equal");
-  }
-
-  if (d1->caller() != NULL) {
-    assert(d1->caller() != NULL && d2->caller() != NULL, "not equal");
-    assert_equal(d1->caller(), d2->caller());
-  } else {
-    assert(d1->caller() == NULL && d2->caller() == NULL, "not equal");
-  }
+	assert(d1->scope() == d2->scope(), "not equal");
+	assert(d1->bci() == d2->bci(), "not equal");
+
+	if (d1->locals() != NULL) {
+		assert(d1->locals() != NULL && d2->locals() != NULL, "not equal");
+		assert(d1->locals()->length() == d2->locals()->length(), "not equal");
+		for (int i = 0; i < d1->locals()->length(); i++) {
+			assert_equal(d1->locals()->at(i), d2->locals()->at(i));
+		}
+	} else {
+		assert(d1->locals() == NULL && d2->locals() == NULL, "not equal");
+	}
+
+	if (d1->expressions() != NULL) {
+		assert(d1->expressions() != NULL && d2->expressions() != NULL, "not equal");
+		assert(d1->expressions()->length() == d2->expressions()->length(), "not equal");
+		for (int i = 0; i < d1->expressions()->length(); i++) {
+			assert_equal(d1->expressions()->at(i), d2->expressions()->at(i));
+		}
+	} else {
+		assert(d1->expressions() == NULL && d2->expressions() == NULL, "not equal");
+	}
+
+	if (d1->monitors() != NULL) {
+		assert(d1->monitors() != NULL && d2->monitors() != NULL, "not equal");
+		assert(d1->monitors()->length() == d2->monitors()->length(), "not equal");
+		for (int i = 0; i < d1->monitors()->length(); i++) {
+			assert_equal(d1->monitors()->at(i), d2->monitors()->at(i));
+		}
+	} else {
+		assert(d1->monitors() == NULL && d2->monitors() == NULL, "not equal");
+	}
+
+	if (d1->caller() != NULL) {
+		assert(d1->caller() != NULL && d2->caller() != NULL, "not equal");
+		assert_equal(d1->caller(), d2->caller());
+	} else {
+		assert(d1->caller() == NULL && d2->caller() == NULL, "not equal");
+	}
 }

 void check_stack_depth(CodeEmitInfo* info, int stack_end) {
-  if (info->bci() != SynchronizationEntryBCI && !info->scope()->method()->is_native()) {
-    Bytecodes::Code code = info->scope()->method()->java_code_at_bci(info->bci());
-    switch (code) {
-      case Bytecodes::_ifnull    : // fall through
-      case Bytecodes::_ifnonnull : // fall through
-      case Bytecodes::_ifeq      : // fall through
-      case Bytecodes::_ifne      : // fall through
-      case Bytecodes::_iflt      : // fall through
-      case Bytecodes::_ifge      : // fall through
-      case Bytecodes::_ifgt      : // fall through
-      case Bytecodes::_ifle      : // fall through
-      case Bytecodes::_if_icmpeq : // fall through
-      case Bytecodes::_if_icmpne : // fall through
-      case Bytecodes::_if_icmplt : // fall through
-      case Bytecodes::_if_icmpge : // fall through
-      case Bytecodes::_if_icmpgt : // fall through
-      case Bytecodes::_if_icmple : // fall through
-      case Bytecodes::_if_acmpeq : // fall through
-      case Bytecodes::_if_acmpne :
-        assert(stack_end >= -Bytecodes::depth(code), "must have non-empty expression stack at if bytecode");
-        break;
-    }
-  }
+	if (info->bci() != SynchronizationEntryBCI && !info->scope()->method()->is_native()) {
+		Bytecodes::Code code = info->scope()->method()->java_code_at_bci(info->bci());
+		switch (code) {
+			case Bytecodes::_ifnull    : // fall through
+			case Bytecodes::_ifnonnull : // fall through
+			case Bytecodes::_ifeq      : // fall through
+			case Bytecodes::_ifne      : // fall through
+			case Bytecodes::_iflt      : // fall through
+			case Bytecodes::_ifge      : // fall through
+			case Bytecodes::_ifgt      : // fall through
+			case Bytecodes::_ifle      : // fall through
+			case Bytecodes::_if_icmpeq : // fall through
+			case Bytecodes::_if_icmpne : // fall through
+			case Bytecodes::_if_icmplt : // fall through
+			case Bytecodes::_if_icmpge : // fall through
+			case Bytecodes::_if_icmpgt : // fall through
+			case Bytecodes::_if_icmple : // fall through
+			case Bytecodes::_if_acmpeq : // fall through
+			case Bytecodes::_if_acmpne :
+				assert(stack_end >= -Bytecodes::depth(code), "must have non-empty expression stack at if bytecode");
+				break;
+		}
+	}
 }

 #endif // ASSERT


 IntervalWalker* LinearScan::init_compute_oop_maps() {
-  // setup lists of potential oops for walking
-  Interval* oop_intervals;
-  Interval* non_oop_intervals;
-
-  create_unhandled_lists(&oop_intervals, &non_oop_intervals, is_oop_interval, NULL);
-
-  // intervals that have no oops inside need not to be processed
-  // to ensure a walking until the last instruction id, add a dummy interval
-  // with a high operation id
-  non_oop_intervals = new Interval(any_reg);
-  non_oop_intervals->add_range(max_jint - 2, max_jint - 1);
-
-  return new IntervalWalker(this, oop_intervals, non_oop_intervals);
+	// setup lists of potential oops for walking
+	Interval* oop_intervals;
+	Interval* non_oop_intervals;
+
+	create_unhandled_lists(&oop_intervals, &non_oop_intervals, is_oop_interval, NULL);
+
+	// intervals that have no oops inside need not to be processed
+	// to ensure a walking until the last instruction id, add a dummy interval
+	// with a high operation id
+	non_oop_intervals = new Interval(any_reg);
+	non_oop_intervals->add_range(max_jint - 2, max_jint - 1);
+
+	return new IntervalWalker(this, oop_intervals, non_oop_intervals);
 }


 OopMap* LinearScan::compute_oop_map(IntervalWalker* iw, LIR_Op* op, CodeEmitInfo* info, bool is_call_site) {
-  TRACE_LINEAR_SCAN(3, tty->print_cr("creating oop map at op_id %d", op->id()));
-
-  // walk before the current operation -> intervals that start at
-  // the operation (= output operands of the operation) are not
-  // included in the oop map
-  iw->walk_before(op->id());
-
-  int frame_size = frame_map()->framesize();
-  int arg_count = frame_map()->oop_map_arg_count();
-  OopMap* map = new OopMap(frame_size, arg_count);
-
-  // Check if this is a patch site.
-  bool is_patch_info = false;
-  if (op->code() == lir_move) {
-    assert(!is_call_site, "move must not be a call site");
-    assert(op->as_Op1() != NULL, "move must be LIR_Op1");
-    LIR_Op1* move = (LIR_Op1*)op;
-
-    is_patch_info = move->patch_code() != lir_patch_none;
-  }
-
-  // Iterate through active intervals
-  for (Interval* interval = iw->active_first(fixedKind); interval != Interval::end(); interval = interval->next()) {
-    int assigned_reg = interval->assigned_reg();
-
-    assert(interval->current_from() <= op->id() && op->id() <= interval->current_to(), "interval should not be active otherwise");
-    assert(interval->assigned_regHi() == any_reg, "oop must be single word");
-    assert(interval->reg_num() >= LIR_OprDesc::vreg_base, "fixed interval found");
-
-    // Check if this range covers the instruction. Intervals that
-    // start or end at the current operation are not included in the
-    // oop map, except in the case of patching moves.  For patching
-    // moves, any intervals which end at this instruction are included
-    // in the oop map since we may safepoint while doing the patch
-    // before we've consumed the inputs.
-    if (is_patch_info || op->id() < interval->current_to()) {
-
-      // caller-save registers must not be included into oop-maps at calls
-      assert(!is_call_site || assigned_reg >= nof_regs || !is_caller_save(assigned_reg), "interval is in a caller-save register at a call -> register will be overwritten");
-
-      VMReg name = vm_reg_for_interval(interval);
-      map->set_oop(name);
-
-      // Spill optimization: when the stack value is guaranteed to be always correct,
-      // then it must be added to the oop map even if the interval is currently in a register
-      if (interval->always_in_memory() &&
-          op->id() > interval->spill_definition_pos() &&
-          interval->assigned_reg() != interval->canonical_spill_slot()) {
-        assert(interval->spill_definition_pos() > 0, "position not set correctly");
-        assert(interval->canonical_spill_slot() >= LinearScan::nof_regs, "no spill slot assigned");
-        assert(interval->assigned_reg() < LinearScan::nof_regs, "interval is on stack, so stack slot is registered twice");
-
-        map->set_oop(frame_map()->slot_regname(interval->canonical_spill_slot() - LinearScan::nof_regs));
-      }
-    }
-  }
-
-  // add oops from lock stack
-  assert(info->stack() != NULL, "CodeEmitInfo must always have a stack");
-  int locks_count = info->stack()->locks_size();
-  for (int i = 0; i < locks_count; i++) {
-    map->set_oop(frame_map()->monitor_object_regname(i));
-  }
-
-  return map;
+	TRACE_LINEAR_SCAN(3, tty->print_cr("creating oop map at op_id %d", op->id()));
+
+	// walk before the current operation -> intervals that start at
+	// the operation (= output operands of the operation) are not
+	// included in the oop map
+	iw->walk_before(op->id());
+
+	int frame_size = frame_map()->framesize();
+	int arg_count = frame_map()->oop_map_arg_count();
+	OopMap* map = new OopMap(frame_size, arg_count);
+
+	// Check if this is a patch site.
+	bool is_patch_info = false;
+	if (op->code() == lir_move) {
+		assert(!is_call_site, "move must not be a call site");
+		assert(op->as_Op1() != NULL, "move must be LIR_Op1");
+		LIR_Op1* move = (LIR_Op1*)op;
+
+		is_patch_info = move->patch_code() != lir_patch_none;
+	}
+
+	// Iterate through active intervals
+	for (Interval* interval = iw->active_first(fixedKind); interval != Interval::end(); interval = interval->next()) {
+		int assigned_reg = interval->assigned_reg();
+
+		assert(interval->current_from() <= op->id() && op->id() <= interval->current_to(), "interval should not be active otherwise");
+		assert(interval->assigned_regHi() == any_reg, "oop must be single word");
+		assert(interval->reg_num() >= LIR_OprDesc::vreg_base, "fixed interval found");
+
+		// Check if this range covers the instruction. Intervals that
+		// start or end at the current operation are not included in the
+		// oop map, except in the case of patching moves.  For patching
+		// moves, any intervals which end at this instruction are included
+		// in the oop map since we may safepoint while doing the patch
+		// before we've consumed the inputs.
+		if (is_patch_info || op->id() < interval->current_to()) {
+
+			// caller-save registers must not be included into oop-maps at calls
+			assert(!is_call_site || assigned_reg >= nof_regs || !is_caller_save(assigned_reg), "interval is in a caller-save register at a call -> register will be overwritten");
+
+			VMReg name = vm_reg_for_interval(interval);
+			map->set_oop(name);
+
+			// Spill optimization: when the stack value is guaranteed to be always correct,
+			// then it must be added to the oop map even if the interval is currently in a register
+			if (interval->always_in_memory() &&
+					op->id() > interval->spill_definition_pos() &&
+					interval->assigned_reg() != interval->canonical_spill_slot()) {
+				assert(interval->spill_definition_pos() > 0, "position not set correctly");
+				assert(interval->canonical_spill_slot() >= LinearScan::nof_regs, "no spill slot assigned");
+				assert(interval->assigned_reg() < LinearScan::nof_regs, "interval is on stack, so stack slot is registered twice");
+
+				map->set_oop(frame_map()->slot_regname(interval->canonical_spill_slot() - LinearScan::nof_regs));
+			}
+		}
+	}
+
+	// add oops from lock stack
+	assert(info->stack() != NULL, "CodeEmitInfo must always have a stack");
+	int locks_count = info->stack()->locks_size();
+	for (int i = 0; i < locks_count; i++) {
+		map->set_oop(frame_map()->monitor_object_regname(i));
+	}
+
+	return map;
 }


 void LinearScan::compute_oop_map(IntervalWalker* iw, const LIR_OpVisitState &visitor, LIR_Op* op) {
-  assert(visitor.info_count() > 0, "no oop map needed");
-
-  // compute oop_map only for first CodeEmitInfo
-  // because it is (in most cases) equal for all other infos of the same operation
-  CodeEmitInfo* first_info = visitor.info_at(0);
-  OopMap* first_oop_map = compute_oop_map(iw, op, first_info, visitor.has_call());
-
-  for (int i = 0; i < visitor.info_count(); i++) {
-    CodeEmitInfo* info = visitor.info_at(i);
-    OopMap* oop_map = first_oop_map;
-
-    if (info->stack()->locks_size() != first_info->stack()->locks_size()) {
-      // this info has a different number of locks then the precomputed oop map
-      // (possible for lock and unlock instructions) -> compute oop map with
-      // correct lock information
-      oop_map = compute_oop_map(iw, op, info, visitor.has_call());
-    }
-
-    if (info->_oop_map == NULL) {
-      info->_oop_map = oop_map;
-    } else {
-      // a CodeEmitInfo can not be shared between different LIR-instructions
-      // because interval splitting can occur anywhere between two instructions
-      // and so the oop maps must be different
-      // -> check if the already set oop_map is exactly the one calculated for this operation
-      assert(info->_oop_map == oop_map, "same CodeEmitInfo used for multiple LIR instructions");
-    }
-  }
+	assert(visitor.info_count() > 0, "no oop map needed");
+
+	// compute oop_map only for first CodeEmitInfo
+	// because it is (in most cases) equal for all other infos of the same operation
+	CodeEmitInfo* first_info = visitor.info_at(0);
+	OopMap* first_oop_map = compute_oop_map(iw, op, first_info, visitor.has_call());
+
+	for (int i = 0; i < visitor.info_count(); i++) {
+		CodeEmitInfo* info = visitor.info_at(i);
+		OopMap* oop_map = first_oop_map;
+
+		if (info->stack()->locks_size() != first_info->stack()->locks_size()) {
+			// this info has a different number of locks then the precomputed oop map
+			// (possible for lock and unlock instructions) -> compute oop map with
+			// correct lock information
+			oop_map = compute_oop_map(iw, op, info, visitor.has_call());
+		}
+
+		if (info->_oop_map == NULL) {
+			info->_oop_map = oop_map;
+		} else {
+			// a CodeEmitInfo can not be shared between different LIR-instructions
+			// because interval splitting can occur anywhere between two instructions
+			// and so the oop maps must be different
+			// -> check if the already set oop_map is exactly the one calculated for this operation
+			assert(info->_oop_map == oop_map, "same CodeEmitInfo used for multiple LIR instructions");
+		}
+	}
 }


@@ -2406,585 +2411,585 @@
 LocationValue         _illegal_value = LocationValue(Location());

 void LinearScan::init_compute_debug_info() {
-  // cache for frequently used scope values
-  // (cpu registers and stack slots)
-  _scope_value_cache = ScopeValueArray((LinearScan::nof_cpu_regs + frame_map()->argcount() + max_spills()) * 2, NULL);
+	// cache for frequently used scope values
+	// (cpu registers and stack slots)
+	_scope_value_cache = ScopeValueArray((LinearScan::nof_cpu_regs + frame_map()->argcount() + max_spills()) * 2, NULL);
 }

 MonitorValue* LinearScan::location_for_monitor_index(int monitor_index) {
-  Location loc;
-  if (!frame_map()->location_for_monitor_object(monitor_index, &loc)) {
-    bailout("too large frame");
-  }
-  ScopeValue* object_scope_value = new LocationValue(loc);
-
-  if (!frame_map()->location_for_monitor_lock(monitor_index, &loc)) {
-    bailout("too large frame");
-  }
-  return new MonitorValue(object_scope_value, loc);
+	Location loc;
+	if (!frame_map()->location_for_monitor_object(monitor_index, &loc)) {
+		bailout("too large frame");
+	}
+	ScopeValue* object_scope_value = new LocationValue(loc);
+
+	if (!frame_map()->location_for_monitor_lock(monitor_index, &loc)) {
+		bailout("too large frame");
+	}
+	return new MonitorValue(object_scope_value, loc);
 }

 LocationValue* LinearScan::location_for_name(int name, Location::Type loc_type) {
-  Location loc;
-  if (!frame_map()->locations_for_slot(name, loc_type, &loc)) {
-    bailout("too large frame");
-  }
-  return new LocationValue(loc);
+	Location loc;
+	if (!frame_map()->locations_for_slot(name, loc_type, &loc)) {
+		bailout("too large frame");
+	}
+	return new LocationValue(loc);
 }


 int LinearScan::append_scope_value_for_constant(LIR_Opr opr, GrowableArray<ScopeValue*>* scope_values) {
-  assert(opr->is_constant(), "should not be called otherwise");
-
-  LIR_Const* c = opr->as_constant_ptr();
-  BasicType t = c->type();
-  switch (t) {
-    case T_OBJECT: {
-      jobject value = c->as_jobject();
-      if (value == NULL) {
-        scope_values->append(&_oop_null_scope_value);
-      } else {
-        scope_values->append(new ConstantOopWriteValue(c->as_jobject()));
-      }
-      return 1;
-    }
-
-    case T_INT: // fall through
-    case T_FLOAT: {
-      int value = c->as_jint_bits();
-      switch (value) {
-        case -1: scope_values->append(&_int_m1_scope_value); break;
-        case 0:  scope_values->append(&_int_0_scope_value); break;
-        case 1:  scope_values->append(&_int_1_scope_value); break;
-        case 2:  scope_values->append(&_int_2_scope_value); break;
-        default: scope_values->append(new ConstantIntValue(c->as_jint_bits())); break;
-      }
-      return 1;
-    }
-
-    case T_LONG: // fall through
-    case T_DOUBLE: {
-      if (hi_word_offset_in_bytes > lo_word_offset_in_bytes) {
-        scope_values->append(new ConstantIntValue(c->as_jint_hi_bits()));
-        scope_values->append(new ConstantIntValue(c->as_jint_lo_bits()));
-      } else {
-        scope_values->append(new ConstantIntValue(c->as_jint_lo_bits()));
-        scope_values->append(new ConstantIntValue(c->as_jint_hi_bits()));
-      }
-
-      return 2;
-    }
-
-    default:
-      ShouldNotReachHere();
-      return -1;
-  }
+	assert(opr->is_constant(), "should not be called otherwise");
+
+	LIR_Const* c = opr->as_constant_ptr();
+	BasicType t = c->type();
+	switch (t) {
+		case T_OBJECT: {
+				       jobject value = c->as_jobject();
+				       if (value == NULL) {
+					       scope_values->append(&_oop_null_scope_value);
+				       } else {
+					       scope_values->append(new ConstantOopWriteValue(c->as_jobject()));
+				       }
+				       return 1;
+			       }
+
+		case T_INT: // fall through
+		case T_FLOAT: {
+				      int value = c->as_jint_bits();
+				      switch (value) {
+					      case -1: scope_values->append(&_int_m1_scope_value); break;
+					      case 0:  scope_values->append(&_int_0_scope_value); break;
+					      case 1:  scope_values->append(&_int_1_scope_value); break;
+					      case 2:  scope_values->append(&_int_2_scope_value); break;
+					      default: scope_values->append(new ConstantIntValue(c->as_jint_bits())); break;
+				      }
+				      return 1;
+			      }
+
+		case T_LONG: // fall through
+		case T_DOUBLE: {
+				       if (hi_word_offset_in_bytes > lo_word_offset_in_bytes) {
+					       scope_values->append(new ConstantIntValue(c->as_jint_hi_bits()));
+					       scope_values->append(new ConstantIntValue(c->as_jint_lo_bits()));
+				       } else {
+					       scope_values->append(new ConstantIntValue(c->as_jint_lo_bits()));
+					       scope_values->append(new ConstantIntValue(c->as_jint_hi_bits()));
+				       }
+
+				       return 2;
+			       }
+
+		default:
+			       ShouldNotReachHere();
+			       return -1;
+	}
 }

 int LinearScan::append_scope_value_for_operand(LIR_Opr opr, GrowableArray<ScopeValue*>* scope_values) {
-  if (opr->is_single_stack()) {
-    int stack_idx = opr->single_stack_ix();
-    bool is_oop = opr->is_oop_register();
-    int cache_idx = (stack_idx + LinearScan::nof_cpu_regs) * 2 + (is_oop ? 1 : 0);
-
-    ScopeValue* sv = _scope_value_cache.at(cache_idx);
-    if (sv == NULL) {
-      Location::Type loc_type = is_oop ? Location::oop : Location::normal;
-      sv = location_for_name(stack_idx, loc_type);
-      _scope_value_cache.at_put(cache_idx, sv);
-    }
-
-    // check if cached value is correct
-    DEBUG_ONLY(assert_equal(sv, location_for_name(stack_idx, is_oop ? Location::oop : Location::normal)));
-
-    scope_values->append(sv);
-    return 1;
-
-  } else if (opr->is_single_cpu()) {
-    bool is_oop = opr->is_oop_register();
-    int cache_idx = opr->cpu_regnr() * 2 + (is_oop ? 1 : 0);
-
-    ScopeValue* sv = _scope_value_cache.at(cache_idx);
-    if (sv == NULL) {
-      Location::Type loc_type = is_oop ? Location::oop : Location::normal;
-      VMReg rname = frame_map()->regname(opr);
-      sv = new LocationValue(Location::new_reg_loc(loc_type, rname));
-      _scope_value_cache.at_put(cache_idx, sv);
-    }
-
-    // check if cached value is correct
-    DEBUG_ONLY(assert_equal(sv, new LocationValue(Location::new_reg_loc(is_oop ? Location::oop : Location::normal, frame_map()->regname(opr)))));
-
-    scope_values->append(sv);
-    return 1;
+	if (opr->is_single_stack()) {
+		int stack_idx = opr->single_stack_ix();
+		bool is_oop = opr->is_oop_register();
+		int cache_idx = (stack_idx + LinearScan::nof_cpu_regs) * 2 + (is_oop ? 1 : 0);
+
+		ScopeValue* sv = _scope_value_cache.at(cache_idx);
+		if (sv == NULL) {
+			Location::Type loc_type = is_oop ? Location::oop : Location::normal;
+			sv = location_for_name(stack_idx, loc_type);
+			_scope_value_cache.at_put(cache_idx, sv);
+		}
+
+		// check if cached value is correct
+		DEBUG_ONLY(assert_equal(sv, location_for_name(stack_idx, is_oop ? Location::oop : Location::normal)));
+
+		scope_values->append(sv);
+		return 1;
+
+	} else if (opr->is_single_cpu()) {
+		bool is_oop = opr->is_oop_register();
+		int cache_idx = opr->cpu_regnr() * 2 + (is_oop ? 1 : 0);
+
+		ScopeValue* sv = _scope_value_cache.at(cache_idx);
+		if (sv == NULL) {
+			Location::Type loc_type = is_oop ? Location::oop : Location::normal;
+			VMReg rname = frame_map()->regname(opr);
+			sv = new LocationValue(Location::new_reg_loc(loc_type, rname));
+			_scope_value_cache.at_put(cache_idx, sv);
+		}
+
+		// check if cached value is correct
+		DEBUG_ONLY(assert_equal(sv, new LocationValue(Location::new_reg_loc(is_oop ? Location::oop : Location::normal, frame_map()->regname(opr)))));
+
+		scope_values->append(sv);
+		return 1;

 #ifdef X86
-  } else if (opr->is_single_xmm()) {
-    VMReg rname = opr->as_xmm_float_reg()->as_VMReg();
-    LocationValue* sv = new LocationValue(Location::new_reg_loc(Location::normal, rname));
-
-    scope_values->append(sv);
-    return 1;
+	} else if (opr->is_single_xmm()) {
+		VMReg rname = opr->as_xmm_float_reg()->as_VMReg();
+		LocationValue* sv = new LocationValue(Location::new_reg_loc(Location::normal, rname));
+
+		scope_values->append(sv);
+		return 1;
 #endif

-  } else if (opr->is_single_fpu()) {
+	} else if (opr->is_single_fpu()) {
 #ifdef X86
-    // the exact location of fpu stack values is only known
-    // during fpu stack allocation, so the stack allocator object
-    // must be present
-    assert(use_fpu_stack_allocation(), "should not have float stack values without fpu stack allocation (all floats must be SSE2)");
-    assert(_fpu_stack_allocator != NULL, "must be present");
-    opr = _fpu_stack_allocator->to_fpu_stack(opr);
+		// the exact location of fpu stack values is only known
+		// during fpu stack allocation, so the stack allocator object
+		// must be present
+		assert(use_fpu_stack_allocation(), "should not have float stack values without fpu stack allocation (all floats must be SSE2)");
+		assert(_fpu_stack_allocator != NULL, "must be present");
+		opr = _fpu_stack_allocator->to_fpu_stack(opr);
 #endif

-    Location::Type loc_type = float_saved_as_double ? Location::float_in_dbl : Location::normal;
-    VMReg rname = frame_map()->fpu_regname(opr->fpu_regnr());
-    LocationValue* sv = new LocationValue(Location::new_reg_loc(loc_type, rname));
-
-    scope_values->append(sv);
-    return 1;
-
-  } else {
-    // double-size operands
-
-    ScopeValue* first;
-    ScopeValue* second;
-
-    if (opr->is_double_stack()) {
+		Location::Type loc_type = float_saved_as_double ? Location::float_in_dbl : Location::normal;
+		VMReg rname = frame_map()->fpu_regname(opr->fpu_regnr());
+		LocationValue* sv = new LocationValue(Location::new_reg_loc(loc_type, rname));
+
+		scope_values->append(sv);
+		return 1;
+
+	} else {
+		// double-size operands
+
+		ScopeValue* first;
+		ScopeValue* second;
+
+		if (opr->is_double_stack()) {
 #ifdef _LP64
-      Location loc1;
-      Location::Type loc_type = opr->type() == T_LONG ? Location::lng : Location::dbl;
-      if (!frame_map()->locations_for_slot(opr->double_stack_ix(), loc_type, &loc1, NULL)) {
-        bailout("too large frame");
-      }
-      // Does this reverse on x86 vs. sparc?
-      first =  new LocationValue(loc1);
-      second = &_int_0_scope_value;
+			Location loc1;
+			Location::Type loc_type = opr->type() == T_LONG ? Location::lng : Location::dbl;
+			if (!frame_map()->locations_for_slot(opr->double_stack_ix(), loc_type, &loc1, NULL)) {
+				bailout("too large frame");
+			}
+			// Does this reverse on x86 vs. sparc?
+			first =  new LocationValue(loc1);
+			second = &_int_0_scope_value;
 #else
-      Location loc1, loc2;
-      if (!frame_map()->locations_for_slot(opr->double_stack_ix(), Location::normal, &loc1, &loc2)) {
-        bailout("too large frame");
-      }
-      first =  new LocationValue(loc1);
-      second = new LocationValue(loc2);
+			Location loc1, loc2;
+			if (!frame_map()->locations_for_slot(opr->double_stack_ix(), Location::normal, &loc1, &loc2)) {
+				bailout("too large frame");
+			}
+			first =  new LocationValue(loc1);
+			second = new LocationValue(loc2);
 #endif // _LP64

-    } else if (opr->is_double_cpu()) {
+		} else if (opr->is_double_cpu()) {
 #ifdef _LP64
-      VMReg rname_first = opr->as_register_lo()->as_VMReg();
-      first = new LocationValue(Location::new_reg_loc(Location::lng, rname_first));
-      second = &_int_0_scope_value;
+			VMReg rname_first = opr->as_register_lo()->as_VMReg();
+			first = new LocationValue(Location::new_reg_loc(Location::lng, rname_first));
+			second = &_int_0_scope_value;
 #else
-      VMReg rname_first = opr->as_register_lo()->as_VMReg();
-      VMReg rname_second = opr->as_register_hi()->as_VMReg();
-
-      if (hi_word_offset_in_bytes < lo_word_offset_in_bytes) {
-        // lo/hi and swapped relative to first and second, so swap them
-        VMReg tmp = rname_first;
-        rname_first = rname_second;
-        rname_second = tmp;
-      }
-
-      first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
-      second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second));
+			VMReg rname_first = opr->as_register_lo()->as_VMReg();
+			VMReg rname_second = opr->as_register_hi()->as_VMReg();
+
+			if (hi_word_offset_in_bytes < lo_word_offset_in_bytes) {
+				// lo/hi and swapped relative to first and second, so swap them
+				VMReg tmp = rname_first;
+				rname_first = rname_second;
+				rname_second = tmp;
+			}
+
+			first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
+			second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second));
 #endif //_LP64


 #ifdef X86
-    } else if (opr->is_double_xmm()) {
-      assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation");
-      VMReg rname_first  = opr->as_xmm_double_reg()->as_VMReg();
-      first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
-      // %%% This is probably a waste but we'll keep things as they were for now
-      if (true) {
-        VMReg rname_second = rname_first->next();
-        second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second));
-      }
+		} else if (opr->is_double_xmm()) {
+			assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation");
+			VMReg rname_first  = opr->as_xmm_double_reg()->as_VMReg();
+			first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
+			// %%% This is probably a waste but we'll keep things as they were for now
+			if (true) {
+				VMReg rname_second = rname_first->next();
+				second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second));
+			}
 #endif

-    } else if (opr->is_double_fpu()) {
-      // On SPARC, fpu_regnrLo/fpu_regnrHi represents the two halves of
-      // the double as float registers in the native ordering. On X86,
-      // fpu_regnrLo is a FPU stack slot whose VMReg represents
-      // the low-order word of the double and fpu_regnrLo + 1 is the
-      // name for the other half.  *first and *second must represent the
-      // least and most significant words, respectively.
+		} else if (opr->is_double_fpu()) {
+			// On SPARC, fpu_regnrLo/fpu_regnrHi represents the two halves of
+			// the double as float registers in the native ordering. On X86,
+			// fpu_regnrLo is a FPU stack slot whose VMReg represents
+			// the low-order word of the double and fpu_regnrLo + 1 is the
+			// name for the other half.  *first and *second must represent the
+			// least and most significant words, respectively.

 #ifdef X86
-      // the exact location of fpu stack values is only known
-      // during fpu stack allocation, so the stack allocator object
-      // must be present
-      assert(use_fpu_stack_allocation(), "should not have float stack values without fpu stack allocation (all floats must be SSE2)");
-      assert(_fpu_stack_allocator != NULL, "must be present");
-      opr = _fpu_stack_allocator->to_fpu_stack(opr);
-
-      assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation (only fpu_regnrHi is used)");
+			// the exact location of fpu stack values is only known
+			// during fpu stack allocation, so the stack allocator object
+			// must be present
+			assert(use_fpu_stack_allocation(), "should not have float stack values without fpu stack allocation (all floats must be SSE2)");
+			assert(_fpu_stack_allocator != NULL, "must be present");
+			opr = _fpu_stack_allocator->to_fpu_stack(opr);
+
+			assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation (only fpu_regnrHi is used)");
 #endif
 #ifdef SPARC
-      assert(opr->fpu_regnrLo() == opr->fpu_regnrHi() + 1, "assumed in calculation (only fpu_regnrHi is used)");
+			assert(opr->fpu_regnrLo() == opr->fpu_regnrHi() + 1, "assumed in calculation (only fpu_regnrHi is used)");
 #endif

-      VMReg rname_first = frame_map()->fpu_regname(opr->fpu_regnrHi());
-
-      first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
-      // %%% This is probably a waste but we'll keep things as they were for now
-      if (true) {
-        VMReg rname_second = rname_first->next();
-        second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second));
-      }
-
-    } else {
-      ShouldNotReachHere();
-      first = NULL;
-      second = NULL;
-    }
-
-    assert(first != NULL && second != NULL, "must be set");
-    // The convention the interpreter uses is that the second local
-    // holds the first raw word of the native double representation.
-    // This is actually reasonable, since locals and stack arrays
-    // grow downwards in all implementations.
-    // (If, on some machine, the interpreter's Java locals or stack
-    // were to grow upwards, the embedded doubles would be word-swapped.)
-    scope_values->append(second);
-    scope_values->append(first);
-    return 2;
-  }
+			VMReg rname_first = frame_map()->fpu_regname(opr->fpu_regnrHi());
+
+			first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
+			// %%% This is probably a waste but we'll keep things as they were for now
+			if (true) {
+				VMReg rname_second = rname_first->next();
+				second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second));
+			}
+
+		} else {
+			ShouldNotReachHere();
+			first = NULL;
+			second = NULL;
+		}
+
+		assert(first != NULL && second != NULL, "must be set");
+		// The convention the interpreter uses is that the second local
+		// holds the first raw word of the native double representation.
+		// This is actually reasonable, since locals and stack arrays
+		// grow downwards in all implementations.
+		// (If, on some machine, the interpreter's Java locals or stack
+		// were to grow upwards, the embedded doubles would be word-swapped.)
+		scope_values->append(second);
+		scope_values->append(first);
+		return 2;
+	}
 }


 int LinearScan::append_scope_value(int op_id, Value value, GrowableArray<ScopeValue*>* scope_values) {
-  if (value != NULL) {
-    LIR_Opr opr = value->operand();
-    Constant* con = value->as_Constant();
-
-    assert(con == NULL || opr->is_virtual() || opr->is_constant() || opr->is_illegal(), "asumption: Constant instructions have only constant operands (or illegal if constant is optimized away)");
-    assert(con != NULL || opr->is_virtual(), "asumption: non-Constant instructions have only virtual operands");
-
-    if (con != NULL && !con->is_pinned() && !opr->is_constant()) {
-      // Unpinned constants may have a virtual operand for a part of the lifetime
-      // or may be illegal when it was optimized away,
-      // so always use a constant operand
-      opr = LIR_OprFact::value_type(con->type());
-    }
-    assert(opr->is_virtual() || opr->is_constant(), "other cases not allowed here");
-
-    if (opr->is_virtual()) {
-      LIR_OpVisitState::OprMode mode = LIR_OpVisitState::inputMode;
-
-      BlockBegin* block = block_of_op_with_id(op_id);
-      if (block->number_of_sux() == 1 && op_id == block->last_lir_instruction_id()) {
-        // generating debug information for the last instruction of a block.
-        // if this instruction is a branch, spill moves are inserted before this branch
-        // and so the wrong operand would be returned (spill moves at block boundaries are not
-        // considered in the live ranges of intervals)
-        // Solution: use the first op_id of the branch target block instead.
-        if (block->lir()->instructions_list()->last()->as_OpBranch() != NULL) {
-          if (block->live_out().at(opr->vreg_number())) {
-            op_id = block->sux_at(0)->first_lir_instruction_id();
-            mode = LIR_OpVisitState::outputMode;
-          }
-        }
-      }
-
-      // Get current location of operand
-      // The operand must be live because debug information is considered when building the intervals
-      // if the interval is not live, color_lir_opr will cause an assertion failure
-      opr = color_lir_opr(opr, op_id, mode);
-      assert(!has_call(op_id) || opr->is_stack() || !is_caller_save(reg_num(opr)), "can not have caller-save register operands at calls");
-
-      // Append to ScopeValue array
-      return append_scope_value_for_operand(opr, scope_values);
-
-    } else {
-      assert(value->as_Constant() != NULL, "all other instructions have only virtual operands");
-      assert(opr->is_constant(), "operand must be constant");
-
-      return append_scope_value_for_constant(opr, scope_values);
-    }
-  } else {
-    // append a dummy value because real value not needed
-    scope_values->append(&_illegal_value);
-    return 1;
-  }
+	if (value != NULL) {
+		LIR_Opr opr = value->operand();
+		Constant* con = value->as_Constant();
+
+		assert(con == NULL || opr->is_virtual() || opr->is_constant() || opr->is_illegal(), "asumption: Constant instructions have only constant operands (or illegal if constant is optimized away)");
+		assert(con != NULL || opr->is_virtual(), "asumption: non-Constant instructions have only virtual operands");
+
+		if (con != NULL && !con->is_pinned() && !opr->is_constant()) {
+			// Unpinned constants may have a virtual operand for a part of the lifetime
+			// or may be illegal when it was optimized away,
+			// so always use a constant operand
+			opr = LIR_OprFact::value_type(con->type());
+		}
+		assert(opr->is_virtual() || opr->is_constant(), "other cases not allowed here");
+
+		if (opr->is_virtual()) {
+			LIR_OpVisitState::OprMode mode = LIR_OpVisitState::inputMode;
+
+			BlockBegin* block = block_of_op_with_id(op_id);
+			if (block->number_of_sux() == 1 && op_id == block->last_lir_instruction_id()) {
+				// generating debug information for the last instruction of a block.
+				// if this instruction is a branch, spill moves are inserted before this branch
+				// and so the wrong operand would be returned (spill moves at block boundaries are not
+				// considered in the live ranges of intervals)
+				// Solution: use the first op_id of the branch target block instead.
+				if (block->lir()->instructions_list()->last()->as_OpBranch() != NULL) {
+					if (block->live_out().at(opr->vreg_number())) {
+						op_id = block->sux_at(0)->first_lir_instruction_id();
+						mode = LIR_OpVisitState::outputMode;
+					}
+				}
+			}
+
+			// Get current location of operand
+			// The operand must be live because debug information is considered when building the intervals
+			// if the interval is not live, color_lir_opr will cause an assertion failure
+			opr = color_lir_opr(opr, op_id, mode);
+			assert(!has_call(op_id) || opr->is_stack() || !is_caller_save(reg_num(opr)), "can not have caller-save register operands at calls");
+
+			// Append to ScopeValue array
+			return append_scope_value_for_operand(opr, scope_values);
+
+		} else {
+			assert(value->as_Constant() != NULL, "all other instructions have only virtual operands");
+			assert(opr->is_constant(), "operand must be constant");
+
+			return append_scope_value_for_constant(opr, scope_values);
+		}
+	} else {
+		// append a dummy value because real value not needed
+		scope_values->append(&_illegal_value);
+		return 1;
+	}
 }


 IRScopeDebugInfo* LinearScan::compute_debug_info_for_scope(int op_id, IRScope* cur_scope, ValueStack* cur_state, ValueStack* innermost_state, int cur_bci, int stack_end, int locks_end) {
-  IRScopeDebugInfo* caller_debug_info = NULL;
-  int stack_begin, locks_begin;
-
-  ValueStack* caller_state = cur_scope->caller_state();
-  if (caller_state != NULL) {
-    // process recursively to compute outermost scope first
-    stack_begin = caller_state->stack_size();
-    locks_begin = caller_state->locks_size();
-    caller_debug_info = compute_debug_info_for_scope(op_id, cur_scope->caller(), caller_state, innermost_state, cur_scope->caller_bci(), stack_begin, locks_begin);
-  } else {
-    stack_begin = 0;
-    locks_begin = 0;
-  }
-
-  // initialize these to null.
-  // If we don't need deopt info or there are no locals, expressions or monitors,
-  // then these get recorded as no information and avoids the allocation of 0 length arrays.
-  GrowableArray<ScopeValue*>*   locals      = NULL;
-  GrowableArray<ScopeValue*>*   expressions = NULL;
-  GrowableArray<MonitorValue*>* monitors    = NULL;
-
-  // describe local variable values
-  int nof_locals = cur_scope->method()->max_locals();
-  if (nof_locals > 0) {
-    locals = new GrowableArray<ScopeValue*>(nof_locals);
-
-    int pos = 0;
-    while (pos < nof_locals) {
-      assert(pos < cur_state->locals_size(), "why not?");
-
-      Value local = cur_state->local_at(pos);
-      pos += append_scope_value(op_id, local, locals);
-
-      assert(locals->length() == pos, "must match");
-    }
-    assert(locals->length() == cur_scope->method()->max_locals(), "wrong number of locals");
-    assert(locals->length() == cur_state->locals_size(), "wrong number of locals");
-  }
-
-
-  // describe expression stack
-  //
-  // When we inline methods containing exception handlers, the
-  // "lock_stacks" are changed to preserve expression stack values
-  // in caller scopes when exception handlers are present. This
-  // can cause callee stacks to be smaller than caller stacks.
-  if (stack_end > innermost_state->stack_size()) {
-    stack_end = innermost_state->stack_size();
-  }
-
-
-
-  int nof_stack = stack_end - stack_begin;
-  if (nof_stack > 0) {
-    expressions = new GrowableArray<ScopeValue*>(nof_stack);
-
-    int pos = stack_begin;
-    while (pos < stack_end) {
-      Value expression = innermost_state->stack_at_inc(pos);
-      append_scope_value(op_id, expression, expressions);
-
-      assert(expressions->length() + stack_begin == pos, "must match");
-    }
-  }
-
-  // describe monitors
-  assert(locks_begin <= locks_end, "error in scope iteration");
-  int nof_locks = locks_end - locks_begin;
-  if (nof_locks > 0) {
-    monitors = new GrowableArray<MonitorValue*>(nof_locks);
-    for (int i = locks_begin; i < locks_end; i++) {
-      monitors->append(location_for_monitor_index(i));
-    }
-  }
-
-  return new IRScopeDebugInfo(cur_scope, cur_bci, locals, expressions, monitors, caller_debug_info);
+	IRScopeDebugInfo* caller_debug_info = NULL;
+	int stack_begin, locks_begin;
+
+	ValueStack* caller_state = cur_scope->caller_state();
+	if (caller_state != NULL) {
+		// process recursively to compute outermost scope first
+		stack_begin = caller_state->stack_size();
+		locks_begin = caller_state->locks_size();
+		caller_debug_info = compute_debug_info_for_scope(op_id, cur_scope->caller(), caller_state, innermost_state, cur_scope->caller_bci(), stack_begin, locks_begin);
+	} else {
+		stack_begin = 0;
+		locks_begin = 0;
+	}
+
+	// initialize these to null.
+	// If we don't need deopt info or there are no locals, expressions or monitors,
+	// then these get recorded as no information and avoids the allocation of 0 length arrays.
+	GrowableArray<ScopeValue*>*   locals      = NULL;
+	GrowableArray<ScopeValue*>*   expressions = NULL;
+	GrowableArray<MonitorValue*>* monitors    = NULL;
+
+	// describe local variable values
+	int nof_locals = cur_scope->method()->max_locals();
+	if (nof_locals > 0) {
+		locals = new GrowableArray<ScopeValue*>(nof_locals);
+
+		int pos = 0;
+		while (pos < nof_locals) {
+			assert(pos < cur_state->locals_size(), "why not?");
+
+			Value local = cur_state->local_at(pos);
+			pos += append_scope_value(op_id, local, locals);
+
+			assert(locals->length() == pos, "must match");
+		}
+		assert(locals->length() == cur_scope->method()->max_locals(), "wrong number of locals");
+		assert(locals->length() == cur_state->locals_size(), "wrong number of locals");
+	}
+
+
+	// describe expression stack
+	//
+	// When we inline methods containing exception handlers, the
+	// "lock_stacks" are changed to preserve expression stack values
+	// in caller scopes when exception handlers are present. This
+	// can cause callee stacks to be smaller than caller stacks.
+	if (stack_end > innermost_state->stack_size()) {
+		stack_end = innermost_state->stack_size();
+	}
+
+
+
+	int nof_stack = stack_end - stack_begin;
+	if (nof_stack > 0) {
+		expressions = new GrowableArray<ScopeValue*>(nof_stack);
+
+		int pos = stack_begin;
+		while (pos < stack_end) {
+			Value expression = innermost_state->stack_at_inc(pos);
+			append_scope_value(op_id, expression, expressions);
+
+			assert(expressions->length() + stack_begin == pos, "must match");
+		}
+	}
+
+	// describe monitors
+	assert(locks_begin <= locks_end, "error in scope iteration");
+	int nof_locks = locks_end - locks_begin;
+	if (nof_locks > 0) {
+		monitors = new GrowableArray<MonitorValue*>(nof_locks);
+		for (int i = locks_begin; i < locks_end; i++) {
+			monitors->append(location_for_monitor_index(i));
+		}
+	}
+
+	return new IRScopeDebugInfo(cur_scope, cur_bci, locals, expressions, monitors, caller_debug_info);
 }


 void LinearScan::compute_debug_info(CodeEmitInfo* info, int op_id) {
-  if (!compilation()->needs_debug_information()) {
-    return;
-  }
-  TRACE_LINEAR_SCAN(3, tty->print_cr("creating debug information at op_id %d", op_id));
-
-  IRScope* innermost_scope = info->scope();
-  ValueStack* innermost_state = info->stack();
-
-  assert(innermost_scope != NULL && innermost_state != NULL, "why is it missing?");
-
-  int stack_end = innermost_state->stack_size();
-  int locks_end = innermost_state->locks_size();
-
-  DEBUG_ONLY(check_stack_depth(info, stack_end));
-
-  if (info->_scope_debug_info == NULL) {
-    // compute debug information
-    info->_scope_debug_info = compute_debug_info_for_scope(op_id, innermost_scope, innermost_state, innermost_state, info->bci(), stack_end, locks_end);
-  } else {
-    // debug information already set. Check that it is correct from the current point of view
-    DEBUG_ONLY(assert_equal(info->_scope_debug_info, compute_debug_info_for_scope(op_id, innermost_scope, innermost_state, innermost_state, info->bci(), stack_end, locks_end)));
-  }
+	if (!compilation()->needs_debug_information()) {
+		return;
+	}
+	TRACE_LINEAR_SCAN(3, tty->print_cr("creating debug information at op_id %d", op_id));
+
+	IRScope* innermost_scope = info->scope();
+	ValueStack* innermost_state = info->stack();
+
+	assert(innermost_scope != NULL && innermost_state != NULL, "why is it missing?");
+
+	int stack_end = innermost_state->stack_size();
+	int locks_end = innermost_state->locks_size();
+
+	DEBUG_ONLY(check_stack_depth(info, stack_end));
+
+	if (info->_scope_debug_info == NULL) {
+		// compute debug information
+		info->_scope_debug_info = compute_debug_info_for_scope(op_id, innermost_scope, innermost_state, innermost_state, info->bci(), stack_end, locks_end);
+	} else {
+		// debug information already set. Check that it is correct from the current point of view
+		DEBUG_ONLY(assert_equal(info->_scope_debug_info, compute_debug_info_for_scope(op_id, innermost_scope, innermost_state, innermost_state, info->bci(), stack_end, locks_end)));
+	}
 }


 void LinearScan::assign_reg_num(LIR_OpList* instructions, IntervalWalker* iw) {
-  LIR_OpVisitState visitor;
-  int num_inst = instructions->length();
-  bool has_dead = false;
-
-  for (int j = 0; j < num_inst; j++) {
-    LIR_Op* op = instructions->at(j);
-    if (op == NULL) {  // this can happen when spill-moves are removed in eliminate_spill_moves
-      has_dead = true;
-      continue;
-    }
-    int op_id = op->id();
-
-    // visit instruction to get list of operands
-    visitor.visit(op);
-
-    // iterate all modes of the visitor and process all virtual operands
-    for_each_visitor_mode(mode) {
-      int n = visitor.opr_count(mode);
-      for (int k = 0; k < n; k++) {
-        LIR_Opr opr = visitor.opr_at(mode, k);
-        if (opr->is_virtual_register()) {
-          visitor.set_opr_at(mode, k, color_lir_opr(opr, op_id, mode));
-        }
-      }
-    }
-
-    if (visitor.info_count() > 0) {
-      // exception handling
-      if (compilation()->has_exception_handlers()) {
-        XHandlers* xhandlers = visitor.all_xhandler();
-        int n = xhandlers->length();
-        for (int k = 0; k < n; k++) {
-          XHandler* handler = xhandlers->handler_at(k);
-          if (handler->entry_code() != NULL) {
-            assign_reg_num(handler->entry_code()->instructions_list(), NULL);
-          }
-        }
-      } else {
-        assert(visitor.all_xhandler()->length() == 0, "missed exception handler");
-      }
-
-      // compute oop map
-      assert(iw != NULL, "needed for compute_oop_map");
-      compute_oop_map(iw, visitor, op);
-
-      // compute debug information
-      if (!use_fpu_stack_allocation()) {
-        // compute debug information if fpu stack allocation is not needed.
-        // when fpu stack allocation is needed, the debug information can not
-        // be computed here because the exact location of fpu operands is not known
-        // -> debug information is created inside the fpu stack allocator
-        int n = visitor.info_count();
-        for (int k = 0; k < n; k++) {
-          compute_debug_info(visitor.info_at(k), op_id);
-        }
-      }
-    }
+	LIR_OpVisitState visitor;
+	int num_inst = instructions->length();
+	bool has_dead = false;
+
+	for (int j = 0; j < num_inst; j++) {
+		LIR_Op* op = instructions->at(j);
+		if (op == NULL) {  // this can happen when spill-moves are removed in eliminate_spill_moves
+			has_dead = true;
+			continue;
+		}
+		int op_id = op->id();
+
+		// visit instruction to get list of operands
+		visitor.visit(op);
+
+		// iterate all modes of the visitor and process all virtual operands
+		for_each_visitor_mode(mode) {
+			int n = visitor.opr_count(mode);
+			for (int k = 0; k < n; k++) {
+				LIR_Opr opr = visitor.opr_at(mode, k);
+				if (opr->is_virtual_register()) {
+					visitor.set_opr_at(mode, k, color_lir_opr(opr, op_id, mode));
+				}
+			}
+		}
+
+		if (visitor.info_count() > 0) {
+			// exception handling
+			if (compilation()->has_exception_handlers()) {
+				XHandlers* xhandlers = visitor.all_xhandler();
+				int n = xhandlers->length();
+				for (int k = 0; k < n; k++) {
+					XHandler* handler = xhandlers->handler_at(k);
+					if (handler->entry_code() != NULL) {
+						assign_reg_num(handler->entry_code()->instructions_list(), NULL);
+					}
+				}
+			} else {
+				assert(visitor.all_xhandler()->length() == 0, "missed exception handler");
+			}
+
+			// compute oop map
+			assert(iw != NULL, "needed for compute_oop_map");
+			compute_oop_map(iw, visitor, op);
+
+			// compute debug information
+			if (!use_fpu_stack_allocation()) {
+				// compute debug information if fpu stack allocation is not needed.
+				// when fpu stack allocation is needed, the debug information can not
+				// be computed here because the exact location of fpu operands is not known
+				// -> debug information is created inside the fpu stack allocator
+				int n = visitor.info_count();
+				for (int k = 0; k < n; k++) {
+					compute_debug_info(visitor.info_at(k), op_id);
+				}
+			}
+		}

 #ifdef ASSERT
-    // make sure we haven't made the op invalid.
-    op->verify();
+		// make sure we haven't made the op invalid.
+		op->verify();
 #endif

-    // remove useless moves
-    if (op->code() == lir_move) {
-      assert(op->as_Op1() != NULL, "move must be LIR_Op1");
-      LIR_Op1* move = (LIR_Op1*)op;
-      LIR_Opr src = move->in_opr();
-      LIR_Opr dst = move->result_opr();
-      if (dst == src ||
-          !dst->is_pointer() && !src->is_pointer() &&
-          src->is_same_register(dst)) {
-        instructions->at_put(j, NULL);
-        has_dead = true;
-      }
-    }
-  }
-
-  if (has_dead) {
-    // iterate all instructions of the block and remove all null-values.
-    int insert_point = 0;
-    for (int j = 0; j < num_inst; j++) {
-      LIR_Op* op = instructions->at(j);
-      if (op != NULL) {
-        if (insert_point != j) {
-          instructions->at_put(insert_point, op);
-        }
-        insert_point++;
-      }
-    }
-    instructions->truncate(insert_point);
-  }
+		// remove useless moves
+		if (op->code() == lir_move) {
+			assert(op->as_Op1() != NULL, "move must be LIR_Op1");
+			LIR_Op1* move = (LIR_Op1*)op;
+			LIR_Opr src = move->in_opr();
+			LIR_Opr dst = move->result_opr();
+			if (dst == src ||
+					!dst->is_pointer() && !src->is_pointer() &&
+					src->is_same_register(dst)) {
+				instructions->at_put(j, NULL);
+				has_dead = true;
+			}
+		}
+	}
+
+	if (has_dead) {
+		// iterate all instructions of the block and remove all null-values.
+		int insert_point = 0;
+		for (int j = 0; j < num_inst; j++) {
+			LIR_Op* op = instructions->at(j);
+			if (op != NULL) {
+				if (insert_point != j) {
+					instructions->at_put(insert_point, op);
+				}
+				insert_point++;
+			}
+		}
+		instructions->truncate(insert_point);
+	}
 }

 void LinearScan::assign_reg_num() {
-  TIME_LINEAR_SCAN(timer_assign_reg_num);
-
-  init_compute_debug_info();
-  IntervalWalker* iw = init_compute_oop_maps();
-
-  int num_blocks = block_count();
-  for (int i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-    assign_reg_num(block->lir()->instructions_list(), iw);
-  }
+	TIME_LINEAR_SCAN(timer_assign_reg_num);
+
+	init_compute_debug_info();
+	IntervalWalker* iw = init_compute_oop_maps();
+
+	int num_blocks = block_count();
+	for (int i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+		assign_reg_num(block->lir()->instructions_list(), iw);
+	}
 }


 void LinearScan::do_linear_scan() {
-  NOT_PRODUCT(_total_timer.begin_method());
-
-  number_instructions();
-
-  NOT_PRODUCT(print_lir(1, "Before Register Allocation"));
-
-  compute_local_live_sets();
-  compute_global_live_sets();
-  CHECK_BAILOUT();
-
-  build_intervals();
-  CHECK_BAILOUT();
-  sort_intervals_before_allocation();
-
-  NOT_PRODUCT(print_intervals("Before Register Allocation"));
-  NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_before_alloc));
-
-  allocate_registers();
-  CHECK_BAILOUT();
-
-  resolve_data_flow();
-  if (compilation()->has_exception_handlers()) {
-    resolve_exception_handlers();
-  }
-  // fill in number of spill slots into frame_map
-  propagate_spill_slots();
-  CHECK_BAILOUT();
-
-  NOT_PRODUCT(print_intervals("After Register Allocation"));
-  NOT_PRODUCT(print_lir(2, "LIR after register allocation:"));
-  DEBUG_ONLY(verify());
-
-  sort_intervals_after_allocation();
-  eliminate_spill_moves();
-  assign_reg_num();
-  CHECK_BAILOUT();
-
-  NOT_PRODUCT(print_lir(2, "LIR after assignment of register numbers:"));
-  NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_after_asign));
-
-  { TIME_LINEAR_SCAN(timer_allocate_fpu_stack);
-
-    if (use_fpu_stack_allocation()) {
-      allocate_fpu_stack(); // Only has effect on Intel
-      NOT_PRODUCT(print_lir(2, "LIR after FPU stack allocation:"));
-    }
-  }
-
-  { TIME_LINEAR_SCAN(timer_optimize_lir);
-
-    EdgeMoveOptimizer::optimize(ir()->code());
-    ControlFlowOptimizer::optimize(ir()->code());
-    // check that cfg is still correct after optimizations
-    ir()->verify();
-  }
-
-  NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
-  NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
-  NOT_PRODUCT(_total_timer.end_method(this));
+	NOT_PRODUCT(_total_timer.begin_method());
+
+	number_instructions();
+
+	NOT_PRODUCT(print_lir(1, "Before Register Allocation"));
+
+	compute_local_live_sets();
+	compute_global_live_sets();
+	CHECK_BAILOUT();
+
+	build_intervals();
+	CHECK_BAILOUT();
+	sort_intervals_before_allocation();
+
+	NOT_PRODUCT(print_intervals("Before Register Allocation"));
+	NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_before_alloc));
+
+	allocate_registers();
+	CHECK_BAILOUT();
+
+	resolve_data_flow();
+	if (compilation()->has_exception_handlers()) {
+		resolve_exception_handlers();
+	}
+	// fill in number of spill slots into frame_map
+	propagate_spill_slots();
+	CHECK_BAILOUT();
+
+	NOT_PRODUCT(print_intervals("After Register Allocation"));
+	NOT_PRODUCT(print_lir(2, "LIR after register allocation:"));
+	DEBUG_ONLY(verify());
+
+	sort_intervals_after_allocation();
+	eliminate_spill_moves();
+	assign_reg_num();
+	CHECK_BAILOUT();
+
+	NOT_PRODUCT(print_lir(2, "LIR after assignment of register numbers:"));
+	NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_after_asign));
+
+	{ TIME_LINEAR_SCAN(timer_allocate_fpu_stack);
+
+		if (use_fpu_stack_allocation()) {
+			allocate_fpu_stack(); // Only has effect on Intel
+			NOT_PRODUCT(print_lir(2, "LIR after FPU stack allocation:"));
+		}
+	}
+
+	{ TIME_LINEAR_SCAN(timer_optimize_lir);
+
+		EdgeMoveOptimizer::optimize(ir()->code());
+		ControlFlowOptimizer::optimize(ir()->code());
+		// check that cfg is still correct after optimizations
+		ir()->verify();
+	}
+
+	NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
+	NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
+	NOT_PRODUCT(_total_timer.end_method(this));
 }


@@ -2993,61 +2998,61 @@
 #ifndef PRODUCT

 void LinearScan::print_timers(double total) {
-  _total_timer.print(total);
+	_total_timer.print(total);
 }

 void LinearScan::print_statistics() {
-  _stat_before_alloc.print("before allocation");
-  _stat_after_asign.print("after assignment of register");
-  _stat_final.print("after optimization");
+	_stat_before_alloc.print("before allocation");
+	_stat_after_asign.print("after assignment of register");
+	_stat_final.print("after optimization");
 }

 void LinearScan::print_bitmap(BitMap& b) {
-  for (unsigned int i = 0; i < b.size(); i++) {
-    if (b.at(i)) tty->print("%d ", i);
-  }
-  tty->cr();
+	for (unsigned int i = 0; i < b.size(); i++) {
+		if (b.at(i)) tty->print("%d ", i);
+	}
+	tty->cr();
 }

 void LinearScan::print_intervals(const char* label) {
-  if (TraceLinearScanLevel >= 1) {
-    int i;
-    tty->cr();
-    tty->print_cr("%s", label);
-
-    for (i = 0; i < interval_count(); i++) {
-      Interval* interval = interval_at(i);
-      if (interval != NULL) {
-        interval->print();
-      }
-    }
-
-    tty->cr();
-    tty->print_cr("--- Basic Blocks ---");
-    for (i = 0; i < block_count(); i++) {
-      BlockBegin* block = block_at(i);
-      tty->print("B%d [%d, %d, %d, %d] ", block->block_id(), block->first_lir_instruction_id(), block->last_lir_instruction_id(), block->loop_index(), block->loop_depth());
-    }
-    tty->cr();
-    tty->cr();
-  }
-
-  if (PrintCFGToFile) {
-    CFGPrinter::print_intervals(&_intervals, label);
-  }
+	if (TraceLinearScanLevel >= 1) {
+		int i;
+		tty->cr();
+		tty->print_cr("%s", label);
+
+		for (i = 0; i < interval_count(); i++) {
+			Interval* interval = interval_at(i);
+			if (interval != NULL) {
+				interval->print();
+			}
+		}
+
+		tty->cr();
+		tty->print_cr("--- Basic Blocks ---");
+		for (i = 0; i < block_count(); i++) {
+			BlockBegin* block = block_at(i);
+			tty->print("B%d [%d, %d, %d, %d] ", block->block_id(), block->first_lir_instruction_id(), block->last_lir_instruction_id(), block->loop_index(), block->loop_depth());
+		}
+		tty->cr();
+		tty->cr();
+	}
+
+	if (PrintCFGToFile) {
+		CFGPrinter::print_intervals(&_intervals, label);
+	}
 }

 void LinearScan::print_lir(int level, const char* label, bool hir_valid) {
-  if (TraceLinearScanLevel >= level) {
-    tty->cr();
-    tty->print_cr("%s", label);
-    print_LIR(ir()->linear_scan_order());
-    tty->cr();
-  }
-
-  if (level == 1 && PrintCFGToFile) {
-    CFGPrinter::print_cfg(ir()->linear_scan_order(), label, hir_valid, true);
-  }
+	if (TraceLinearScanLevel >= level) {
+		tty->cr();
+		tty->print_cr("%s", label);
+		print_LIR(ir()->linear_scan_order());
+		tty->cr();
+	}
+
+	if (level == 1 && PrintCFGToFile) {
+		CFGPrinter::print_cfg(ir()->linear_scan_order(), label, hir_valid, true);
+	}
 }

 #endif //PRODUCT
@@ -3058,444 +3063,444 @@
 #ifdef ASSERT

 void LinearScan::verify() {
-  TRACE_LINEAR_SCAN(2, tty->print_cr("********* verifying intervals ******************************************"));
-  verify_intervals();
-
-  TRACE_LINEAR_SCAN(2, tty->print_cr("********* verifying that no oops are in fixed intervals ****************"));
-  verify_no_oops_in_fixed_intervals();
-
-  TRACE_LINEAR_SCAN(2, tty->print_cr("********* verifying that unpinned constants are not alive across block boundaries"));
-  verify_constants();
-
-  TRACE_LINEAR_SCAN(2, tty->print_cr("********* verifying register allocation ********************************"));
-  verify_registers();
-
-  TRACE_LINEAR_SCAN(2, tty->print_cr("********* no errors found **********************************************"));
+	TRACE_LINEAR_SCAN(2, tty->print_cr("********* verifying intervals ******************************************"));
+	verify_intervals();
+
+	TRACE_LINEAR_SCAN(2, tty->print_cr("********* verifying that no oops are in fixed intervals ****************"));
+	verify_no_oops_in_fixed_intervals();
+
+	TRACE_LINEAR_SCAN(2, tty->print_cr("********* verifying that unpinned constants are not alive across block boundaries"));
+	verify_constants();
+
+	TRACE_LINEAR_SCAN(2, tty->print_cr("********* verifying register allocation ********************************"));
+	verify_registers();
+
+	TRACE_LINEAR_SCAN(2, tty->print_cr("********* no errors found **********************************************"));
 }

 void LinearScan::verify_intervals() {
-  int len = interval_count();
-  bool has_error = false;
-
-  for (int i = 0; i < len; i++) {
-    Interval* i1 = interval_at(i);
-    if (i1 == NULL) continue;
-
-    i1->check_split_children();
-
-    if (i1->reg_num() != i) {
-      tty->print_cr("Interval %d is on position %d in list", i1->reg_num(), i); i1->print(); tty->cr();
-      has_error = true;
-    }
-
-    if (i1->reg_num() >= LIR_OprDesc::vreg_base && i1->type() == T_ILLEGAL) {
-      tty->print_cr("Interval %d has no type assigned", i1->reg_num()); i1->print(); tty->cr();
-      has_error = true;
-    }
-
-    if (i1->assigned_reg() == any_reg) {
-      tty->print_cr("Interval %d has no register assigned", i1->reg_num()); i1->print(); tty->cr();
-      has_error = true;
-    }
-
-    if (i1->assigned_reg() == i1->assigned_regHi()) {
-      tty->print_cr("Interval %d: low and high register equal", i1->reg_num()); i1->print(); tty->cr();
-      has_error = true;
-    }
-
-    if (!is_processed_reg_num(i1->assigned_reg())) {
-      tty->print_cr("Can not have an Interval for an ignored register"); i1->print(); tty->cr();
-      has_error = true;
-    }
-
-    if (i1->first() == Range::end()) {
-      tty->print_cr("Interval %d has no Range", i1->reg_num()); i1->print(); tty->cr();
-      has_error = true;
-    }
-
-    for (Range* r = i1->first(); r != Range::end(); r = r->next()) {
-      if (r->from() >= r->to()) {
-        tty->print_cr("Interval %d has zero length range", i1->reg_num()); i1->print(); tty->cr();
-        has_error = true;
-      }
-    }
-
-    for (int j = i + 1; j < len; j++) {
-      Interval* i2 = interval_at(j);
-      if (i2 == NULL) continue;
-
-      // special intervals that are created in MoveResolver
-      // -> ignore them because the range information has no meaning there
-      if (i1->from() == 1 && i1->to() == 2) continue;
-      if (i2->from() == 1 && i2->to() == 2) continue;
-
-      int r1 = i1->assigned_reg();
-      int r1Hi = i1->assigned_regHi();
-      int r2 = i2->assigned_reg();
-      int r2Hi = i2->assigned_regHi();
-      if (i1->intersects(i2) && (r1 == r2 || r1 == r2Hi || (r1Hi != any_reg && (r1Hi == r2 || r1Hi == r2Hi)))) {
-        tty->print_cr("Intervals %d and %d overlap and have the same register assigned", i1->reg_num(), i2->reg_num());
-        i1->print(); tty->cr();
-        i2->print(); tty->cr();
-        has_error = true;
-      }
-    }
-  }
-
-  assert(has_error == false, "register allocation invalid");
+	int len = interval_count();
+	bool has_error = false;
+
+	for (int i = 0; i < len; i++) {
+		Interval* i1 = interval_at(i);
+		if (i1 == NULL) continue;
+
+		i1->check_split_children();
+
+		if (i1->reg_num() != i) {
+			tty->print_cr("Interval %d is on position %d in list", i1->reg_num(), i); i1->print(); tty->cr();
+			has_error = true;
+		}
+
+		if (i1->reg_num() >= LIR_OprDesc::vreg_base && i1->type() == T_ILLEGAL) {
+			tty->print_cr("Interval %d has no type assigned", i1->reg_num()); i1->print(); tty->cr();
+			has_error = true;
+		}
+
+		if (i1->assigned_reg() == any_reg) {
+			tty->print_cr("Interval %d has no register assigned", i1->reg_num()); i1->print(); tty->cr();
+			has_error = true;
+		}
+
+		if (i1->assigned_reg() == i1->assigned_regHi()) {
+			tty->print_cr("Interval %d: low and high register equal", i1->reg_num()); i1->print(); tty->cr();
+			has_error = true;
+		}
+
+		if (!is_processed_reg_num(i1->assigned_reg())) {
+			tty->print_cr("Can not have an Interval for an ignored register"); i1->print(); tty->cr();
+			has_error = true;
+		}
+
+		if (i1->first() == Range::end()) {
+			tty->print_cr("Interval %d has no Range", i1->reg_num()); i1->print(); tty->cr();
+			has_error = true;
+		}
+
+		for (Range* r = i1->first(); r != Range::end(); r = r->next()) {
+			if (r->from() >= r->to()) {
+				tty->print_cr("Interval %d has zero length range", i1->reg_num()); i1->print(); tty->cr();
+				has_error = true;
+			}
+		}
+
+		for (int j = i + 1; j < len; j++) {
+			Interval* i2 = interval_at(j);
+			if (i2 == NULL) continue;
+
+			// special intervals that are created in MoveResolver
+			// -> ignore them because the range information has no meaning there
+			if (i1->from() == 1 && i1->to() == 2) continue;
+			if (i2->from() == 1 && i2->to() == 2) continue;
+
+			int r1 = i1->assigned_reg();
+			int r1Hi = i1->assigned_regHi();
+			int r2 = i2->assigned_reg();
+			int r2Hi = i2->assigned_regHi();
+			if (i1->intersects(i2) && (r1 == r2 || r1 == r2Hi || (r1Hi != any_reg && (r1Hi == r2 || r1Hi == r2Hi)))) {
+				tty->print_cr("Intervals %d and %d overlap and have the same register assigned", i1->reg_num(), i2->reg_num());
+				i1->print(); tty->cr();
+				i2->print(); tty->cr();
+				has_error = true;
+			}
+		}
+	}
+
+	assert(has_error == false, "register allocation invalid");
 }


 void LinearScan::verify_no_oops_in_fixed_intervals() {
-  LIR_OpVisitState visitor;
-  for (int i = 0; i < block_count(); i++) {
-    BlockBegin* block = block_at(i);
-
-    LIR_OpList* instructions = block->lir()->instructions_list();
-
-    for (int j = 0; j < instructions->length(); j++) {
-      LIR_Op* op = instructions->at(j);
-      int op_id = op->id();
-
-      visitor.visit(op);
-
-      // oop-maps at calls do not contain registers, so check is not needed
-      if (!visitor.has_call()) {
-
-        for_each_visitor_mode(mode) {
-          int n = visitor.opr_count(mode);
-          for (int k = 0; k < n; k++) {
-            LIR_Opr opr = visitor.opr_at(mode, k);
-
-            if (opr->is_fixed_cpu() && opr->is_oop()) {
-              // operand is a non-virtual cpu register and contains an oop
-              TRACE_LINEAR_SCAN(4, op->print_on(tty); tty->print("checking operand "); opr->print(); tty->cr());
-
-              Interval* interval = interval_at(reg_num(opr));
-              assert(interval != NULL, "no interval");
-
-              if (mode == LIR_OpVisitState::inputMode) {
-                if (interval->to() >= op_id + 1) {
-                  assert(interval->to() < op_id + 2 ||
-                         interval->has_hole_between(op_id, op_id + 2),
-                         "oop input operand live after instruction");
-                }
-              } else if (mode == LIR_OpVisitState::outputMode) {
-                if (interval->from() <= op_id - 1) {
-                  assert(interval->has_hole_between(op_id - 1, op_id),
-                         "oop input operand live after instruction");
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
+	LIR_OpVisitState visitor;
+	for (int i = 0; i < block_count(); i++) {
+		BlockBegin* block = block_at(i);
+
+		LIR_OpList* instructions = block->lir()->instructions_list();
+
+		for (int j = 0; j < instructions->length(); j++) {
+			LIR_Op* op = instructions->at(j);
+			int op_id = op->id();
+
+			visitor.visit(op);
+
+			// oop-maps at calls do not contain registers, so check is not needed
+			if (!visitor.has_call()) {
+
+				for_each_visitor_mode(mode) {
+					int n = visitor.opr_count(mode);
+					for (int k = 0; k < n; k++) {
+						LIR_Opr opr = visitor.opr_at(mode, k);
+
+						if (opr->is_fixed_cpu() && opr->is_oop()) {
+							// operand is a non-virtual cpu register and contains an oop
+							TRACE_LINEAR_SCAN(4, op->print_on(tty); tty->print("checking operand "); opr->print(); tty->cr());
+
+							Interval* interval = interval_at(reg_num(opr));
+							assert(interval != NULL, "no interval");
+
+							if (mode == LIR_OpVisitState::inputMode) {
+								if (interval->to() >= op_id + 1) {
+									assert(interval->to() < op_id + 2 ||
+											interval->has_hole_between(op_id, op_id + 2),
+											"oop input operand live after instruction");
+								}
+							} else if (mode == LIR_OpVisitState::outputMode) {
+								if (interval->from() <= op_id - 1) {
+									assert(interval->has_hole_between(op_id - 1, op_id),
+											"oop input operand live after instruction");
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
 }


 void LinearScan::verify_constants() {
-  int num_regs = num_virtual_regs();
-  int size = live_set_size();
-  int num_blocks = block_count();
-
-  for (int i = 0; i < num_blocks; i++) {
-    BlockBegin* block = block_at(i);
-    BitMap live_at_edge = block->live_in();
-
-    // visit all registers where the live_at_edge bit is set
-    for (int r = (int)live_at_edge.get_next_one_offset(0, size); r < size; r = (int)live_at_edge.get_next_one_offset(r + 1, size)) {
-      TRACE_LINEAR_SCAN(4, tty->print("checking interval %d of block B%d", r, block->block_id()));
-
-      Value value = gen()->instruction_for_vreg(r);
-
-      assert(value != NULL, "all intervals live across block boundaries must have Value");
-      assert(value->operand()->is_register() && value->operand()->is_virtual(), "value must have virtual operand");
-      assert(value->operand()->vreg_number() == r, "register number must match");
-      // TKR assert(value->as_Constant() == NULL || value->is_pinned(), "only pinned constants can be alive accross block boundaries");
-    }
-  }
+	int num_regs = num_virtual_regs();
+	int size = live_set_size();
+	int num_blocks = block_count();
+
+	for (int i = 0; i < num_blocks; i++) {
+		BlockBegin* block = block_at(i);
+		BitMap live_at_edge = block->live_in();
+
+		// visit all registers where the live_at_edge bit is set
+		for (int r = (int)live_at_edge.get_next_one_offset(0, size); r < size; r = (int)live_at_edge.get_next_one_offset(r + 1, size)) {
+			TRACE_LINEAR_SCAN(4, tty->print("checking interval %d of block B%d", r, block->block_id()));
+
+			Value value = gen()->instruction_for_vreg(r);
+
+			assert(value != NULL, "all intervals live across block boundaries must have Value");
+			assert(value->operand()->is_register() && value->operand()->is_virtual(), "value must have virtual operand");
+			assert(value->operand()->vreg_number() == r, "register number must match");
+			// TKR assert(value->as_Constant() == NULL || value->is_pinned(), "only pinned constants can be alive accross block boundaries");
+		}
+	}
 }


 class RegisterVerifier: public StackObj {
- private:
-  LinearScan*   _allocator;
-  BlockList     _work_list;      // all blocks that must be processed
-  IntervalsList _saved_states;   // saved information of previous check
-
-  // simplified access to methods of LinearScan
-  Compilation*  compilation() const              { return _allocator->compilation(); }
-  Interval*     interval_at(int reg_num) const   { return _allocator->interval_at(reg_num); }
-  int           reg_num(LIR_Opr opr) const       { return _allocator->reg_num(opr); }
-
-  // currently, only registers are processed
-  int           state_size()                     { return LinearScan::nof_regs; }
-
-  // accessors
-  IntervalList* state_for_block(BlockBegin* block) { return _saved_states.at(block->block_id()); }
-  void          set_state_for_block(BlockBegin* block, IntervalList* saved_state) { _saved_states.at_put(block->block_id(), saved_state); }
-  void          add_to_work_list(BlockBegin* block) { if (!_work_list.contains(block)) _work_list.append(block); }
-
-  // helper functions
-  IntervalList* copy(IntervalList* input_state);
-  void          state_put(IntervalList* input_state, int reg, Interval* interval);
-  bool          check_state(IntervalList* input_state, int reg, Interval* interval);
-
-  void process_block(BlockBegin* block);
-  void process_xhandler(XHandler* xhandler, IntervalList* input_state);
-  void process_successor(BlockBegin* block, IntervalList* input_state);
-  void process_operations(LIR_List* ops, IntervalList* input_state);
-
- public:
-  RegisterVerifier(LinearScan* allocator)
-    : _allocator(allocator)
-    , _work_list(16)
-    , _saved_states(BlockBegin::number_of_blocks(), NULL)
-  { }
-
-  void verify(BlockBegin* start);
+	private:
+		LinearScan*   _allocator;
+		BlockList     _work_list;      // all blocks that must be processed
+		IntervalsList _saved_states;   // saved information of previous check
+
+		// simplified access to methods of LinearScan
+		Compilation*  compilation() const              { return _allocator->compilation(); }
+		Interval*     interval_at(int reg_num) const   { return _allocator->interval_at(reg_num); }
+		int           reg_num(LIR_Opr opr) const       { return _allocator->reg_num(opr); }
+
+		// currently, only registers are processed
+		int           state_size()                     { return LinearScan::nof_regs; }
+
+		// accessors
+		IntervalList* state_for_block(BlockBegin* block) { return _saved_states.at(block->block_id()); }
+		void          set_state_for_block(BlockBegin* block, IntervalList* saved_state) { _saved_states.at_put(block->block_id(), saved_state); }
+		void          add_to_work_list(BlockBegin* block) { if (!_work_list.contains(block)) _work_list.append(block); }
+
+		// helper functions
+		IntervalList* copy(IntervalList* input_state);
+		void          state_put(IntervalList* input_state, int reg, Interval* interval);
+		bool          check_state(IntervalList* input_state, int reg, Interval* interval);
+
+		void process_block(BlockBegin* block);
+		void process_xhandler(XHandler* xhandler, IntervalList* input_state);
+		void process_successor(BlockBegin* block, IntervalList* input_state);
+		void process_operations(LIR_List* ops, IntervalList* input_state);
+
+	public:
+		RegisterVerifier(LinearScan* allocator)
+			: _allocator(allocator)
+			  , _work_list(16)
+			  , _saved_states(BlockBegin::number_of_blocks(), NULL)
+	{ }
+
+		void verify(BlockBegin* start);
 };


 // entry function from LinearScan that starts the verification
 void LinearScan::verify_registers() {
-  RegisterVerifier verifier(this);
-  verifier.verify(block_at(0));
+	RegisterVerifier verifier(this);
+	verifier.verify(block_at(0));
 }


 void RegisterVerifier::verify(BlockBegin* start) {
-  // setup input registers (method arguments) for first block
-  IntervalList* input_state = new IntervalList(state_size(), NULL);
-  CallingConvention* args = compilation()->frame_map()->incoming_arguments();
-  for (int n = 0; n < args->length(); n++) {
-    LIR_Opr opr = args->at(n);
-    if (opr->is_register()) {
-      Interval* interval = interval_at(reg_num(opr));
-
-      if (interval->assigned_reg() < state_size()) {
-        input_state->at_put(interval->assigned_reg(), interval);
-      }
-      if (interval->assigned_regHi() != LinearScan::any_reg && interval->assigned_regHi() < state_size()) {
-        input_state->at_put(interval->assigned_regHi(), interval);
-      }
-    }
-  }
-
-  set_state_for_block(start, input_state);
-  add_to_work_list(start);
-
-  // main loop for verification
-  do {
-    BlockBegin* block = _work_list.at(0);
-    _work_list.remove_at(0);
-
-    process_block(block);
-  } while (!_work_list.is_empty());
+	// setup input registers (method arguments) for first block
+	IntervalList* input_state = new IntervalList(state_size(), NULL);
+	CallingConvention* args = compilation()->frame_map()->incoming_arguments();
+	for (int n = 0; n < args->length(); n++) {
+		LIR_Opr opr = args->at(n);
+		if (opr->is_register()) {
+			Interval* interval = interval_at(reg_num(opr));
+
+			if (interval->assigned_reg() < state_size()) {
+				input_state->at_put(interval->assigned_reg(), interval);
+			}
+			if (interval->assigned_regHi() != LinearScan::any_reg && interval->assigned_regHi() < state_size()) {
+				input_state->at_put(interval->assigned_regHi(), interval);
+			}
+		}
+	}
+
+	set_state_for_block(start, input_state);
+	add_to_work_list(start);
+
+	// main loop for verification
+	do {
+		BlockBegin* block = _work_list.at(0);
+		_work_list.remove_at(0);
+
+		process_block(block);
+	} while (!_work_list.is_empty());
 }

 void RegisterVerifier::process_block(BlockBegin* block) {
-  TRACE_LINEAR_SCAN(2, tty->cr(); tty->print_cr("process_block B%d", block->block_id()));
-
-  // must copy state because it is modified
-  IntervalList* input_state = copy(state_for_block(block));
-
-  if (TraceLinearScanLevel >= 4) {
-    tty->print_cr("Input-State of intervals:");
-    tty->print("    ");
-    for (int i = 0; i < state_size(); i++) {
-      if (input_state->at(i) != NULL) {
-        tty->print(" %4d", input_state->at(i)->reg_num());
-      } else {
-        tty->print("   __");
-      }
-    }
-    tty->cr();
-    tty->cr();
-  }
-
-  // process all operations of the block
-  process_operations(block->lir(), input_state);
-
-  // iterate all successors
-  for (int i = 0; i < block->number_of_sux(); i++) {
-    process_successor(block->sux_at(i), input_state);
-  }
+	TRACE_LINEAR_SCAN(2, tty->cr(); tty->print_cr("process_block B%d", block->block_id()));
+
+	// must copy state because it is modified
+	IntervalList* input_state = copy(state_for_block(block));
+
+	if (TraceLinearScanLevel >= 4) {
+		tty->print_cr("Input-State of intervals:");
+		tty->print("    ");
+		for (int i = 0; i < state_size(); i++) {
+			if (input_state->at(i) != NULL) {
+				tty->print(" %4d", input_state->at(i)->reg_num());
+			} else {
+				tty->print("   __");
+			}
+		}
+		tty->cr();
+		tty->cr();
+	}
+
+	// process all operations of the block
+	process_operations(block->lir(), input_state);
+
+	// iterate all successors
+	for (int i = 0; i < block->number_of_sux(); i++) {
+		process_successor(block->sux_at(i), input_state);
+	}
 }

 void RegisterVerifier::process_xhandler(XHandler* xhandler, IntervalList* input_state) {
-  TRACE_LINEAR_SCAN(2, tty->print_cr("process_xhandler B%d", xhandler->entry_block()->block_id()));
-
-  // must copy state because it is modified
-  input_state = copy(input_state);
-
-  if (xhandler->entry_code() != NULL) {
-    process_operations(xhandler->entry_code(), input_state);
-  }
-  process_successor(xhandler->entry_block(), input_state);
+	TRACE_LINEAR_SCAN(2, tty->print_cr("process_xhandler B%d", xhandler->entry_block()->block_id()));
+
+	// must copy state because it is modified
+	input_state = copy(input_state);
+
+	if (xhandler->entry_code() != NULL) {
+		process_operations(xhandler->entry_code(), input_state);
+	}
+	process_successor(xhandler->entry_block(), input_state);
 }

 void RegisterVerifier::process_successor(BlockBegin* block, IntervalList* input_state) {
-  IntervalList* saved_state = state_for_block(block);
-
-  if (saved_state != NULL) {
-    // this block was already processed before.
-    // check if new input_state is consistent with saved_state
-
-    bool saved_state_correct = true;
-    for (int i = 0; i < state_size(); i++) {
-      if (input_state->at(i) != saved_state->at(i)) {
-        // current input_state and previous saved_state assume a different
-        // interval in this register -> assume that this register is invalid
-        if (saved_state->at(i) != NULL) {
-          // invalidate old calculation only if it assumed that
-          // register was valid. when the register was already invalid,
-          // then the old calculation was correct.
-          saved_state_correct = false;
-          saved_state->at_put(i, NULL);
-
-          TRACE_LINEAR_SCAN(4, tty->print_cr("process_successor B%d: invalidating slot %d", block->block_id(), i));
-        }
-      }
-    }
-
-    if (saved_state_correct) {
-      // already processed block with correct input_state
-      TRACE_LINEAR_SCAN(2, tty->print_cr("process_successor B%d: previous visit already correct", block->block_id()));
-    } else {
-      // must re-visit this block
-      TRACE_LINEAR_SCAN(2, tty->print_cr("process_successor B%d: must re-visit because input state changed", block->block_id()));
-      add_to_work_list(block);
-    }
-
-  } else {
-    // block was not processed before, so set initial input_state
-    TRACE_LINEAR_SCAN(2, tty->print_cr("process_successor B%d: initial visit", block->block_id()));
-
-    set_state_for_block(block, copy(input_state));
-    add_to_work_list(block);
-  }
+	IntervalList* saved_state = state_for_block(block);
+
+	if (saved_state != NULL) {
+		// this block was already processed before.
+		// check if new input_state is consistent with saved_state
+
+		bool saved_state_correct = true;
+		for (int i = 0; i < state_size(); i++) {
+			if (input_state->at(i) != saved_state->at(i)) {
+				// current input_state and previous saved_state assume a different
+				// interval in this register -> assume that this register is invalid
+				if (saved_state->at(i) != NULL) {
+					// invalidate old calculation only if it assumed that
+					// register was valid. when the register was already invalid,
+					// then the old calculation was correct.
+					saved_state_correct = false;
+					saved_state->at_put(i, NULL);
+
+					TRACE_LINEAR_SCAN(4, tty->print_cr("process_successor B%d: invalidating slot %d", block->block_id(), i));
+				}
+			}
+		}
+
+		if (saved_state_correct) {
+			// already processed block with correct input_state
+			TRACE_LINEAR_SCAN(2, tty->print_cr("process_successor B%d: previous visit already correct", block->block_id()));
+		} else {
+			// must re-visit this block
+			TRACE_LINEAR_SCAN(2, tty->print_cr("process_successor B%d: must re-visit because input state changed", block->block_id()));
+			add_to_work_list(block);
+		}
+
+	} else {
+		// block was not processed before, so set initial input_state
+		TRACE_LINEAR_SCAN(2, tty->print_cr("process_successor B%d: initial visit", block->block_id()));
+
+		set_state_for_block(block, copy(input_state));
+		add_to_work_list(block);
+	}
 }


 IntervalList* RegisterVerifier::copy(IntervalList* input_state) {
-  IntervalList* copy_state = new IntervalList(input_state->length());
-  copy_state->push_all(input_state);
-  return copy_state;
+	IntervalList* copy_state = new IntervalList(input_state->length());
+	copy_state->push_all(input_state);
+	return copy_state;
 }

 void RegisterVerifier::state_put(IntervalList* input_state, int reg, Interval* interval) {
-  if (reg != LinearScan::any_reg && reg < state_size()) {
-    if (interval != NULL) {
-      TRACE_LINEAR_SCAN(4, tty->print_cr("        reg[%d] = %d", reg, interval->reg_num()));
-    } else if (input_state->at(reg) != NULL) {
-      TRACE_LINEAR_SCAN(4, tty->print_cr("        reg[%d] = NULL", reg));
-    }
-
-    input_state->at_put(reg, interval);
-  }
+	if (reg != LinearScan::any_reg && reg < state_size()) {
+		if (interval != NULL) {
+			TRACE_LINEAR_SCAN(4, tty->print_cr("        reg[%d] = %d", reg, interval->reg_num()));
+		} else if (input_state->at(reg) != NULL) {
+			TRACE_LINEAR_SCAN(4, tty->print_cr("        reg[%d] = NULL", reg));
+		}
+
+		input_state->at_put(reg, interval);
+	}
 }

 bool RegisterVerifier::check_state(IntervalList* input_state, int reg, Interval* interval) {
-  if (reg != LinearScan::any_reg && reg < state_size()) {
-    if (input_state->at(reg) != interval) {
-      tty->print_cr("!! Error in register allocation: register %d does not contain interval %d", reg, interval->reg_num());
-      return true;
-    }
-  }
-  return false;
+	if (reg != LinearScan::any_reg && reg < state_size()) {
+		if (input_state->at(reg) != interval) {
+			tty->print_cr("!! Error in register allocation: register %d does not contain interval %d", reg, interval->reg_num());
+			return true;
+		}
+	}
+	return false;
 }

 void RegisterVerifier::process_operations(LIR_List* ops, IntervalList* input_state) {
-  // visit all instructions of the block
-  LIR_OpVisitState visitor;
-  bool has_error = false;
-
-  for (int i = 0; i < ops->length(); i++) {
-    LIR_Op* op = ops->at(i);
-    visitor.visit(op);
-
-    TRACE_LINEAR_SCAN(4, op->print_on(tty));
-
-    // check if input operands are correct
-    int j;
-    int n = visitor.opr_count(LIR_OpVisitState::inputMode);
-    for (j = 0; j < n; j++) {
-      LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::inputMode, j);
-      if (opr->is_register() && LinearScan::is_processed_reg_num(reg_num(opr))) {
-        Interval* interval = interval_at(reg_num(opr));
-        if (op->id() != -1) {
-          interval = interval->split_child_at_op_id(op->id(), LIR_OpVisitState::inputMode);
-        }
-
-        has_error |= check_state(input_state, interval->assigned_reg(),   interval->split_parent());
-        has_error |= check_state(input_state, interval->assigned_regHi(), interval->split_parent());
-
-        // When an operand is marked with is_last_use, then the fpu stack allocator
-        // removes the register from the fpu stack -> the register contains no value
-        if (opr->is_last_use()) {
-          state_put(input_state, interval->assigned_reg(),   NULL);
-          state_put(input_state, interval->assigned_regHi(), NULL);
-        }
-      }
-    }
-
-    // invalidate all caller save registers at calls
-    if (visitor.has_call()) {
-      for (j = 0; j < FrameMap::nof_caller_save_cpu_regs; j++) {
-        state_put(input_state, reg_num(FrameMap::caller_save_cpu_reg_at(j)), NULL);
-      }
-      for (j = 0; j < FrameMap::nof_caller_save_fpu_regs; j++) {
-        state_put(input_state, reg_num(FrameMap::caller_save_fpu_reg_at(j)), NULL);
-      }
+	// visit all instructions of the block
+	LIR_OpVisitState visitor;
+	bool has_error = false;
+
+	for (int i = 0; i < ops->length(); i++) {
+		LIR_Op* op = ops->at(i);
+		visitor.visit(op);
+
+		TRACE_LINEAR_SCAN(4, op->print_on(tty));
+
+		// check if input operands are correct
+		int j;
+		int n = visitor.opr_count(LIR_OpVisitState::inputMode);
+		for (j = 0; j < n; j++) {
+			LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::inputMode, j);
+			if (opr->is_register() && LinearScan::is_processed_reg_num(reg_num(opr))) {
+				Interval* interval = interval_at(reg_num(opr));
+				if (op->id() != -1) {
+					interval = interval->split_child_at_op_id(op->id(), LIR_OpVisitState::inputMode);
+				}
+
+				has_error |= check_state(input_state, interval->assigned_reg(),   interval->split_parent());
+				has_error |= check_state(input_state, interval->assigned_regHi(), interval->split_parent());
+
+				// When an operand is marked with is_last_use, then the fpu stack allocator
+				// removes the register from the fpu stack -> the register contains no value
+				if (opr->is_last_use()) {
+					state_put(input_state, interval->assigned_reg(),   NULL);
+					state_put(input_state, interval->assigned_regHi(), NULL);
+				}
+			}
+		}
+
+		// invalidate all caller save registers at calls
+		if (visitor.has_call()) {
+			for (j = 0; j < FrameMap::nof_caller_save_cpu_regs; j++) {
+				state_put(input_state, reg_num(FrameMap::caller_save_cpu_reg_at(j)), NULL);
+			}
+			for (j = 0; j < FrameMap::nof_caller_save_fpu_regs; j++) {
+				state_put(input_state, reg_num(FrameMap::caller_save_fpu_reg_at(j)), NULL);
+			}

 #ifdef X86
-      for (j = 0; j < FrameMap::nof_caller_save_xmm_regs; j++) {
-        state_put(input_state, reg_num(FrameMap::caller_save_xmm_reg_at(j)), NULL);
-      }
+			for (j = 0; j < FrameMap::nof_caller_save_xmm_regs; j++) {
+				state_put(input_state, reg_num(FrameMap::caller_save_xmm_reg_at(j)), NULL);
+			}
 #endif
-    }
-
-    // process xhandler before output and temp operands
-    XHandlers* xhandlers = visitor.all_xhandler();
-    n = xhandlers->length();
-    for (int k = 0; k < n; k++) {
-      process_xhandler(xhandlers->handler_at(k), input_state);
-    }
-
-    // set temp operands (some operations use temp operands also as output operands, so can't set them NULL)
-    n = visitor.opr_count(LIR_OpVisitState::tempMode);
-    for (j = 0; j < n; j++) {
-      LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::tempMode, j);
-      if (opr->is_register() && LinearScan::is_processed_reg_num(reg_num(opr))) {
-        Interval* interval = interval_at(reg_num(opr));
-        if (op->id() != -1) {
-          interval = interval->split_child_at_op_id(op->id(), LIR_OpVisitState::tempMode);
-        }
-
-        state_put(input_state, interval->assigned_reg(),   interval->split_parent());
-        state_put(input_state, interval->assigned_regHi(), interval->split_parent());
-      }
-    }
-
-    // set output operands
-    n = visitor.opr_count(LIR_OpVisitState::outputMode);
-    for (j = 0; j < n; j++) {
-      LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::outputMode, j);
-      if (opr->is_register() && LinearScan::is_processed_reg_num(reg_num(opr))) {
-        Interval* interval = interval_at(reg_num(opr));
-        if (op->id() != -1) {
-          interval = interval->split_child_at_op_id(op->id(), LIR_OpVisitState::outputMode);
-        }
-
-        state_put(input_state, interval->assigned_reg(),   interval->split_parent());
-        state_put(input_state, interval->assigned_regHi(), interval->split_parent());
-      }
-    }
-  }
-  assert(has_error == false, "Error in register allocation");
+		}
+
+		// process xhandler before output and temp operands
+		XHandlers* xhandlers = visitor.all_xhandler();
+		n = xhandlers->length();
+		for (int k = 0; k < n; k++) {
+			process_xhandler(xhandlers->handler_at(k), input_state);
+		}
+
+		// set temp operands (some operations use temp operands also as output operands, so can't set them NULL)
+		n = visitor.opr_count(LIR_OpVisitState::tempMode);
+		for (j = 0; j < n; j++) {
+			LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::tempMode, j);
+			if (opr->is_register() && LinearScan::is_processed_reg_num(reg_num(opr))) {
+				Interval* interval = interval_at(reg_num(opr));
+				if (op->id() != -1) {
+					interval = interval->split_child_at_op_id(op->id(), LIR_OpVisitState::tempMode);
+				}
+
+				state_put(input_state, interval->assigned_reg(),   interval->split_parent());
+				state_put(input_state, interval->assigned_regHi(), interval->split_parent());
+			}
+		}
+
+		// set output operands
+		n = visitor.opr_count(LIR_OpVisitState::outputMode);
+		for (j = 0; j < n; j++) {
+			LIR_Opr opr = visitor.opr_at(LIR_OpVisitState::outputMode, j);
+			if (opr->is_register() && LinearScan::is_processed_reg_num(reg_num(opr))) {
+				Interval* interval = interval_at(reg_num(opr));
+				if (op->id() != -1) {
+					interval = interval->split_child_at_op_id(op->id(), LIR_OpVisitState::outputMode);
+				}
+
+				state_put(input_state, interval->assigned_reg(),   interval->split_parent());
+				state_put(input_state, interval->assigned_regHi(), interval->split_parent());
+			}
+		}
+	}
+	assert(has_error == false, "Error in register allocation");
 }

 #endif // ASSERT
@@ -3505,93 +3510,93 @@
 // **** Implementation of MoveResolver ******************************

 MoveResolver::MoveResolver(LinearScan* allocator) :
-  _allocator(allocator),
-  _multiple_reads_allowed(false),
-  _mapping_from(8),
-  _mapping_from_opr(8),
-  _mapping_to(8),
-  _insert_list(NULL),
-  _insert_idx(-1),
-  _insertion_buffer()
+	_allocator(allocator),
+	_multiple_reads_allowed(false),
+	_mapping_from(8),
+	_mapping_from_opr(8),
+	_mapping_to(8),
+	_insert_list(NULL),
+	_insert_idx(-1),
+	_insertion_buffer()
 {
-  for (int i = 0; i < LinearScan::nof_regs; i++) {
-    _register_blocked[i] = 0;
-  }
-  DEBUG_ONLY(check_empty());
+	for (int i = 0; i < LinearScan::nof_regs; i++) {
+		_register_blocked[i] = 0;
+	}
+	DEBUG_ONLY(check_empty());
 }


 #ifdef ASSERT

 void MoveResolver::check_empty() {
-  assert(_mapping_from.length() == 0 && _mapping_from_opr.length() == 0 && _mapping_to.length() == 0, "list must be empty before and after processing");
-  for (int i = 0; i < LinearScan::nof_regs; i++) {
-    assert(register_blocked(i) == 0, "register map must be empty before and after processing");
-  }
-  assert(_multiple_reads_allowed == false, "must have default value");
+	assert(_mapping_from.length() == 0 && _mapping_from_opr.length() == 0 && _mapping_to.length() == 0, "list must be empty before and after processing");
+	for (int i = 0; i < LinearScan::nof_regs; i++) {
+		assert(register_blocked(i) == 0, "register map must be empty before and after processing");
+	}
+	assert(_multiple_reads_allowed == false, "must have default value");
 }

 void MoveResolver::verify_before_resolve() {
-  assert(_mapping_from.length() == _mapping_from_opr.length(), "length must be equal");
-  assert(_mapping_from.length() == _mapping_to.length(), "length must be equal");
-  assert(_insert_list != NULL && _insert_idx != -1, "insert position not set");
-
-  int i, j;
-  if (!_multiple_reads_allowed) {
-    for (i = 0; i < _mapping_from.length(); i++) {
-      for (j = i + 1; j < _mapping_from.length(); j++) {
-        assert(_mapping_from.at(i) == NULL || _mapping_from.at(i) != _mapping_from.at(j), "cannot read from same interval twice");
-      }
-    }
-  }
-
-  for (i = 0; i < _mapping_to.length(); i++) {
-    for (j = i + 1; j < _mapping_to.length(); j++) {
-      assert(_mapping_to.at(i) != _mapping_to.at(j), "cannot write to same interval twice");
-    }
-  }
-
-
-  BitMap used_regs(LinearScan::nof_regs + allocator()->frame_map()->argcount() + allocator()->max_spills());
-  used_regs.clear();
-  if (!_multiple_reads_allowed) {
-    for (i = 0; i < _mapping_from.length(); i++) {
-      Interval* it = _mapping_from.at(i);
-      if (it != NULL) {
-        assert(!used_regs.at(it->assigned_reg()), "cannot read from same register twice");
-        used_regs.set_bit(it->assigned_reg());
-
-        if (it->assigned_regHi() != LinearScan::any_reg) {
-          assert(!used_regs.at(it->assigned_regHi()), "cannot read from same register twice");
-          used_regs.set_bit(it->assigned_regHi());
-        }
-      }
-    }
-  }
-
-  used_regs.clear();
-  for (i = 0; i < _mapping_to.length(); i++) {
-    Interval* it = _mapping_to.at(i);
-    assert(!used_regs.at(it->assigned_reg()), "cannot write to same register twice");
-    used_regs.set_bit(it->assigned_reg());
-
-    if (it->assigned_regHi() != LinearScan::any_reg) {
-      assert(!used_regs.at(it->assigned_regHi()), "cannot write to same register twice");
-      used_regs.set_bit(it->assigned_regHi());
-    }
-  }
-
-  used_regs.clear();
-  for (i = 0; i < _mapping_from.length(); i++) {
-    Interval* it = _mapping_from.at(i);
-    if (it != NULL && it->assigned_reg() >= LinearScan::nof_regs) {
-      used_regs.set_bit(it->assigned_reg());
-    }
-  }
-  for (i = 0; i < _mapping_to.length(); i++) {
-    Interval* it = _mapping_to.at(i);
-    assert(!used_regs.at(it->assigned_reg()) || it->assigned_reg() == _mapping_from.at(i)->assigned_reg(), "stack slots used in _mapping_from must be disjoint to _mapping_to");
-  }
+	assert(_mapping_from.length() == _mapping_from_opr.length(), "length must be equal");
+	assert(_mapping_from.length() == _mapping_to.length(), "length must be equal");
+	assert(_insert_list != NULL && _insert_idx != -1, "insert position not set");
+
+	int i, j;
+	if (!_multiple_reads_allowed) {
+		for (i = 0; i < _mapping_from.length(); i++) {
+			for (j = i + 1; j < _mapping_from.length(); j++) {
+				assert(_mapping_from.at(i) == NULL || _mapping_from.at(i) != _mapping_from.at(j), "cannot read from same interval twice");
+			}
+		}
+	}
+
+	for (i = 0; i < _mapping_to.length(); i++) {
+		for (j = i + 1; j < _mapping_to.length(); j++) {
+			assert(_mapping_to.at(i) != _mapping_to.at(j), "cannot write to same interval twice");
+		}
+	}
+
+
+	BitMap used_regs(LinearScan::nof_regs + allocator()->frame_map()->argcount() + allocator()->max_spills());
+	used_regs.clear();
+	if (!_multiple_reads_allowed) {
+		for (i = 0; i < _mapping_from.length(); i++) {
+			Interval* it = _mapping_from.at(i);
+			if (it != NULL) {
+				assert(!used_regs.at(it->assigned_reg()), "cannot read from same register twice");
+				used_regs.set_bit(it->assigned_reg());
+
+				if (it->assigned_regHi() != LinearScan::any_reg) {
+					assert(!used_regs.at(it->assigned_regHi()), "cannot read from same register twice");
+					used_regs.set_bit(it->assigned_regHi());
+				}
+			}
+		}
+	}
+
+	used_regs.clear();
+	for (i = 0; i < _mapping_to.length(); i++) {
+		Interval* it = _mapping_to.at(i);
+		assert(!used_regs.at(it->assigned_reg()), "cannot write to same register twice");
+		used_regs.set_bit(it->assigned_reg());
+
+		if (it->assigned_regHi() != LinearScan::any_reg) {
+			assert(!used_regs.at(it->assigned_regHi()), "cannot write to same register twice");
+			used_regs.set_bit(it->assigned_regHi());
+		}
+	}
+
+	used_regs.clear();
+	for (i = 0; i < _mapping_from.length(); i++) {
+		Interval* it = _mapping_from.at(i);
+		if (it != NULL && it->assigned_reg() >= LinearScan::nof_regs) {
+			used_regs.set_bit(it->assigned_reg());
+		}
+	}
+	for (i = 0; i < _mapping_to.length(); i++) {
+		Interval* it = _mapping_to.at(i);
+		assert(!used_regs.at(it->assigned_reg()) || it->assigned_reg() == _mapping_from.at(i)->assigned_reg(), "stack slots used in _mapping_from must be disjoint to _mapping_to");
+	}
 }

 #endif // ASSERT
@@ -3599,241 +3604,241 @@

 // mark assigned_reg and assigned_regHi of the interval as blocked
 void MoveResolver::block_registers(Interval* it) {
-  int reg = it->assigned_reg();
-  if (reg < LinearScan::nof_regs) {
-    assert(_multiple_reads_allowed || register_blocked(reg) == 0, "register already marked as used");
-    set_register_blocked(reg, 1);
-  }
-  reg = it->assigned_regHi();
-  if (reg != LinearScan::any_reg && reg < LinearScan::nof_regs) {
-    assert(_multiple_reads_allowed || register_blocked(reg) == 0, "register already marked as used");
-    set_register_blocked(reg, 1);
-  }
+	int reg = it->assigned_reg();
+	if (reg < LinearScan::nof_regs) {
+		assert(_multiple_reads_allowed || register_blocked(reg) == 0, "register already marked as used");
+		set_register_blocked(reg, 1);
+	}
+	reg = it->assigned_regHi();
+	if (reg != LinearScan::any_reg && reg < LinearScan::nof_regs) {
+		assert(_multiple_reads_allowed || register_blocked(reg) == 0, "register already marked as used");
+		set_register_blocked(reg, 1);
+	}
 }

 // mark assigned_reg and assigned_regHi of the interval as unblocked
 void MoveResolver::unblock_registers(Interval* it) {
-  int reg = it->assigned_reg();
-  if (reg < LinearScan::nof_regs) {
-    assert(register_blocked(reg) > 0, "register already marked as unused");
-    set_register_blocked(reg, -1);
-  }
-  reg = it->assigned_regHi();
-  if (reg != LinearScan::any_reg && reg < LinearScan::nof_regs) {
-    assert(register_blocked(reg) > 0, "register already marked as unused");
-    set_register_blocked(reg, -1);
-  }
+	int reg = it->assigned_reg();
+	if (reg < LinearScan::nof_regs) {
+		assert(register_blocked(reg) > 0, "register already marked as unused");
+		set_register_blocked(reg, -1);
+	}
+	reg = it->assigned_regHi();
+	if (reg != LinearScan::any_reg && reg < LinearScan::nof_regs) {
+		assert(register_blocked(reg) > 0, "register already marked as unused");
+		set_register_blocked(reg, -1);
+	}
 }

 // check if assigned_reg and assigned_regHi of the to-interval are not blocked (or only blocked by from)
 bool MoveResolver::save_to_process_move(Interval* from, Interval* to) {
-  int from_reg = -1;
-  int from_regHi = -1;
-  if (from != NULL) {
-    from_reg = from->assigned_reg();
-    from_regHi = from->assigned_regHi();
-  }
-
-  int reg = to->assigned_reg();
-  if (reg < LinearScan::nof_regs) {
-    if (register_blocked(reg) > 1 || (register_blocked(reg) == 1 && reg != from_reg && reg != from_regHi)) {
-      return false;
-    }
-  }
-  reg = to->assigned_regHi();
-  if (reg != LinearScan::any_reg && reg < LinearScan::nof_regs) {
-    if (register_blocked(reg) > 1 || (register_blocked(reg) == 1 && reg != from_reg && reg != from_regHi)) {
-      return false;
-    }
-  }
-
-  return true;
+	int from_reg = -1;
+	int from_regHi = -1;
+	if (from != NULL) {
+		from_reg = from->assigned_reg();
+		from_regHi = from->assigned_regHi();
+	}
+
+	int reg = to->assigned_reg();
+	if (reg < LinearScan::nof_regs) {
+		if (register_blocked(reg) > 1 || (register_blocked(reg) == 1 && reg != from_reg && reg != from_regHi)) {
+			return false;
+		}
+	}
+	reg = to->assigned_regHi();
+	if (reg != LinearScan::any_reg && reg < LinearScan::nof_regs) {
+		if (register_blocked(reg) > 1 || (register_blocked(reg) == 1 && reg != from_reg && reg != from_regHi)) {
+			return false;
+		}
+	}
+
+	return true;
 }


 void MoveResolver::create_insertion_buffer(LIR_List* list) {
-  assert(!_insertion_buffer.initialized(), "overwriting existing buffer");
-  _insertion_buffer.init(list);
+	assert(!_insertion_buffer.initialized(), "overwriting existing buffer");
+	_insertion_buffer.init(list);
 }

 void MoveResolver::append_insertion_buffer() {
-  if (_insertion_buffer.initialized()) {
-    _insertion_buffer.lir_list()->append(&_insertion_buffer);
-  }
-  assert(!_insertion_buffer.initialized(), "must be uninitialized now");
-
-  _insert_list = NULL;
-  _insert_idx = -1;
+	if (_insertion_buffer.initialized()) {
+		_insertion_buffer.lir_list()->append(&_insertion_buffer);
+	}
+	assert(!_insertion_buffer.initialized(), "must be uninitialized now");
+
+	_insert_list = NULL;
+	_insert_idx = -1;
 }

 void MoveResolver::insert_move(Interval* from_interval, Interval* to_interval) {
-  assert(from_interval->reg_num() != to_interval->reg_num(), "from and to interval equal");
-  assert(from_interval->type() == to_interval->type(), "move between different types");
-  assert(_insert_list != NULL && _insert_idx != -1, "must setup insert position first");
-  assert(_insertion_buffer.lir_list() == _insert_list, "wrong insertion buffer");
-
-  LIR_Opr from_opr = LIR_OprFact::virtual_register(from_interval->reg_num(), from_interval->type());
-  LIR_Opr to_opr = LIR_OprFact::virtual_register(to_interval->reg_num(), to_interval->type());
-
-  if (!_multiple_reads_allowed) {
-    // the last_use flag is an optimization for FPU stack allocation. When the same
-    // input interval is used in more than one move, then it is too difficult to determine
-    // if this move is really the last use.
-    from_opr = from_opr->make_last_use();
-  }
-  _insertion_buffer.move(_insert_idx, from_opr, to_opr);
-
-  TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: inserted move from register %d (%d, %d) to %d (%d, %d)", from_interval->reg_num(), from_interval->assigned_reg(), from_interval->assigned_regHi(), to_interval->reg_num(), to_interval->assigned_reg(), to_interval->assigned_regHi()));
+	assert(from_interval->reg_num() != to_interval->reg_num(), "from and to interval equal");
+	assert(from_interval->type() == to_interval->type(), "move between different types");
+	assert(_insert_list != NULL && _insert_idx != -1, "must setup insert position first");
+	assert(_insertion_buffer.lir_list() == _insert_list, "wrong insertion buffer");
+
+	LIR_Opr from_opr = LIR_OprFact::virtual_register(from_interval->reg_num(), from_interval->type());
+	LIR_Opr to_opr = LIR_OprFact::virtual_register(to_interval->reg_num(), to_interval->type());
+
+	if (!_multiple_reads_allowed) {
+		// the last_use flag is an optimization for FPU stack allocation. When the same
+		// input interval is used in more than one move, then it is too difficult to determine
+		// if this move is really the last use.
+		from_opr = from_opr->make_last_use();
+	}
+	_insertion_buffer.move(_insert_idx, from_opr, to_opr);
+
+	TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: inserted move from register %d (%d, %d) to %d (%d, %d)", from_interval->reg_num(), from_interval->assigned_reg(), from_interval->assigned_regHi(), to_interval->reg_num(), to_interval->assigned_reg(), to_interval->assigned_regHi()));
 }

 void MoveResolver::insert_move(LIR_Opr from_opr, Interval* to_interval) {
-  assert(from_opr->type() == to_interval->type(), "move between different types");
-  assert(_insert_list != NULL && _insert_idx != -1, "must setup insert position first");
-  assert(_insertion_buffer.lir_list() == _insert_list, "wrong insertion buffer");
-
-  LIR_Opr to_opr = LIR_OprFact::virtual_register(to_interval->reg_num(), to_interval->type());
-  _insertion_buffer.move(_insert_idx, from_opr, to_opr);
-
-  TRACE_LINEAR_SCAN(4, tty->print("MoveResolver: inserted move from constant "); from_opr->print(); tty->print_cr("  to %d (%d, %d)", to_interval->reg_num(), to_interval->assigned_reg(), to_interval->assigned_regHi()));
+	assert(from_opr->type() == to_interval->type(), "move between different types");
+	assert(_insert_list != NULL && _insert_idx != -1, "must setup insert position first");
+	assert(_insertion_buffer.lir_list() == _insert_list, "wrong insertion buffer");
+
+	LIR_Opr to_opr = LIR_OprFact::virtual_register(to_interval->reg_num(), to_interval->type());
+	_insertion_buffer.move(_insert_idx, from_opr, to_opr);
+
+	TRACE_LINEAR_SCAN(4, tty->print("MoveResolver: inserted move from constant "); from_opr->print(); tty->print_cr("  to %d (%d, %d)", to_interval->reg_num(), to_interval->assigned_reg(), to_interval->assigned_regHi()));
 }


 void MoveResolver::resolve_mappings() {
-  TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: resolving mappings for Block B%d, index %d", _insert_list->block() != NULL ? _insert_list->block()->block_id() : -1, _insert_idx));
-  DEBUG_ONLY(verify_before_resolve());
-
-  // Block all registers that are used as input operands of a move.
-  // When a register is blocked, no move to this register is emitted.
-  // This is necessary for detecting cycles in moves.
-  int i;
-  for (i = _mapping_from.length() - 1; i >= 0; i--) {
-    Interval* from_interval = _mapping_from.at(i);
-    if (from_interval != NULL) {
-      block_registers(from_interval);
-    }
-  }
-
-  int spill_candidate = -1;
-  while (_mapping_from.length() > 0) {
-    bool processed_interval = false;
-
-    for (i = _mapping_from.length() - 1; i >= 0; i--) {
-      Interval* from_interval = _mapping_from.at(i);
-      Interval* to_interval = _mapping_to.at(i);
-
-      if (save_to_process_move(from_interval, to_interval)) {
-        // this inverval can be processed because target is free
-        if (from_interval != NULL) {
-          insert_move(from_interval, to_interval);
-          unblock_registers(from_interval);
-        } else {
-          insert_move(_mapping_from_opr.at(i), to_interval);
-        }
-        _mapping_from.remove_at(i);
-        _mapping_from_opr.remove_at(i);
-        _mapping_to.remove_at(i);
-
-        processed_interval = true;
-      } else if (from_interval != NULL && from_interval->assigned_reg() < LinearScan::nof_regs) {
-        // this interval cannot be processed now because target is not free
-        // it starts in a register, so it is a possible candidate for spilling
-        spill_candidate = i;
-      }
-    }
-
-    if (!processed_interval) {
-      // no move could be processed because there is a cycle in the move list
-      // (e.g. r1 -> r2, r2 -> r1), so one interval must be spilled to memory
-      assert(spill_candidate != -1, "no interval in register for spilling found");
-
-      // create a new spill interval and assign a stack slot to it
-      Interval* from_interval = _mapping_from.at(spill_candidate);
-      Interval* spill_interval = new Interval(-1);
-      spill_interval->set_type(from_interval->type());
-
-      // add a dummy range because real position is difficult to calculate
-      // Note: this range is a special case when the integrity of the allocation is checked
-      spill_interval->add_range(1, 2);
-
-      //       do not allocate a new spill slot for temporary interval, but
-      //       use spill slot assigned to from_interval. Otherwise moves from
-      //       one stack slot to another can happen (not allowed by LIR_Assembler
-      int spill_slot = from_interval->canonical_spill_slot();
-      if (spill_slot < 0) {
-        spill_slot = allocator()->allocate_spill_slot(type2spill_size[spill_interval->type()] == 2);
-        from_interval->set_canonical_spill_slot(spill_slot);
-      }
-      spill_interval->assign_reg(spill_slot);
-      allocator()->append_interval(spill_interval);
-
-      TRACE_LINEAR_SCAN(4, tty->print_cr("created new Interval %d for spilling", spill_interval->reg_num()));
-
-      // insert a move from register to stack and update the mapping
-      insert_move(from_interval, spill_interval);
-      _mapping_from.at_put(spill_candidate, spill_interval);
-      unblock_registers(from_interval);
-    }
-  }
-
-  // reset to default value
-  _multiple_reads_allowed = false;
-
-  // check that all intervals have been processed
-  DEBUG_ONLY(check_empty());
+	TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: resolving mappings for Block B%d, index %d", _insert_list->block() != NULL ? _insert_list->block()->block_id() : -1, _insert_idx));
+	DEBUG_ONLY(verify_before_resolve());
+
+	// Block all registers that are used as input operands of a move.
+	// When a register is blocked, no move to this register is emitted.
+	// This is necessary for detecting cycles in moves.
+	int i;
+	for (i = _mapping_from.length() - 1; i >= 0; i--) {
+		Interval* from_interval = _mapping_from.at(i);
+		if (from_interval != NULL) {
+			block_registers(from_interval);
+		}
+	}
+
+	int spill_candidate = -1;
+	while (_mapping_from.length() > 0) {
+		bool processed_interval = false;
+
+		for (i = _mapping_from.length() - 1; i >= 0; i--) {
+			Interval* from_interval = _mapping_from.at(i);
+			Interval* to_interval = _mapping_to.at(i);
+
+			if (save_to_process_move(from_interval, to_interval)) {
+				// this inverval can be processed because target is free
+				if (from_interval != NULL) {
+					insert_move(from_interval, to_interval);
+					unblock_registers(from_interval);
+				} else {
+					insert_move(_mapping_from_opr.at(i), to_interval);
+				}
+				_mapping_from.remove_at(i);
+				_mapping_from_opr.remove_at(i);
+				_mapping_to.remove_at(i);
+
+				processed_interval = true;
+			} else if (from_interval != NULL && from_interval->assigned_reg() < LinearScan::nof_regs) {
+				// this interval cannot be processed now because target is not free
+				// it starts in a register, so it is a possible candidate for spilling
+				spill_candidate = i;
+			}
+		}
+
+		if (!processed_interval) {
+			// no move could be processed because there is a cycle in the move list
+			// (e.g. r1 -> r2, r2 -> r1), so one interval must be spilled to memory
+			assert(spill_candidate != -1, "no interval in register for spilling found");
+
+			// create a new spill interval and assign a stack slot to it
+			Interval* from_interval = _mapping_from.at(spill_candidate);
+			Interval* spill_interval = new Interval(-1);
+			spill_interval->set_type(from_interval->type());
+
+			// add a dummy range because real position is difficult to calculate
+			// Note: this range is a special case when the integrity of the allocation is checked
+			spill_interval->add_range(1, 2);
+
+			//       do not allocate a new spill slot for temporary interval, but
+			//       use spill slot assigned to from_interval. Otherwise moves from
+			//       one stack slot to another can happen (not allowed by LIR_Assembler
+			int spill_slot = from_interval->canonical_spill_slot();
+			if (spill_slot < 0) {
+				spill_slot = allocator()->allocate_spill_slot(type2spill_size[spill_interval->type()] == 2);
+				from_interval->set_canonical_spill_slot(spill_slot);
+			}
+			spill_interval->assign_reg(spill_slot);
+			allocator()->append_interval(spill_interval);
+
+			TRACE_LINEAR_SCAN(4, tty->print_cr("created new Interval %d for spilling", spill_interval->reg_num()));
+
+			// insert a move from register to stack and update the mapping
+			insert_move(from_interval, spill_interval);
+			_mapping_from.at_put(spill_candidate, spill_interval);
+			unblock_registers(from_interval);
+		}
+	}
+
+	// reset to default value
+	_multiple_reads_allowed = false;
+
+	// check that all intervals have been processed
+	DEBUG_ONLY(check_empty());
 }


 void MoveResolver::set_insert_position(LIR_List* insert_list, int insert_idx) {
-  TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: setting insert position to Block B%d, index %d", insert_list->block() != NULL ? insert_list->block()->block_id() : -1, insert_idx));
-  assert(_insert_list == NULL && _insert_idx == -1, "use move_insert_position instead of set_insert_position when data already set");
-
-  create_insertion_buffer(insert_list);
-  _insert_list = insert_list;
-  _insert_idx = insert_idx;
+	TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: setting insert position to Block B%d, index %d", insert_list->block() != NULL ? insert_list->block()->block_id() : -1, insert_idx));
+	assert(_insert_list == NULL && _insert_idx == -1, "use move_insert_position instead of set_insert_position when data already set");
+
+	create_insertion_buffer(insert_list);
+	_insert_list = insert_list;
+	_insert_idx = insert_idx;
 }

 void MoveResolver::move_insert_position(LIR_List* insert_list, int insert_idx) {
-  TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: moving insert position to Block B%d, index %d", insert_list->block() != NULL ? insert_list->block()->block_id() : -1, insert_idx));
-
-  if (_insert_list != NULL && (insert_list != _insert_list || insert_idx != _insert_idx)) {
-    // insert position changed -> resolve current mappings
-    resolve_mappings();
-  }
-
-  if (insert_list != _insert_list) {
-    // block changed -> append insertion_buffer because it is
-    // bound to a specific block and create a new insertion_buffer
-    append_insertion_buffer();
-    create_insertion_buffer(insert_list);
-  }
-
-  _insert_list = insert_list;
-  _insert_idx = insert_idx;
+	TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: moving insert position to Block B%d, index %d", insert_list->block() != NULL ? insert_list->block()->block_id() : -1, insert_idx));
+
+	if (_insert_list != NULL && (insert_list != _insert_list || insert_idx != _insert_idx)) {
+		// insert position changed -> resolve current mappings
+		resolve_mappings();
+	}
+
+	if (insert_list != _insert_list) {
+		// block changed -> append insertion_buffer because it is
+		// bound to a specific block and create a new insertion_buffer
+		append_insertion_buffer();
+		create_insertion_buffer(insert_list);
+	}
+
+	_insert_list = insert_list;
+	_insert_idx = insert_idx;
 }

 void MoveResolver::add_mapping(Interval* from_interval, Interval* to_interval) {
-  TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: adding mapping from %d (%d, %d) to %d (%d, %d)", from_interval->reg_num(), from_interval->assigned_reg(), from_interval->assigned_regHi(), to_interval->reg_num(), to_interval->assigned_reg(), to_interval->assigned_regHi()));
-
-  _mapping_from.append(from_interval);
-  _mapping_from_opr.append(LIR_OprFact::illegalOpr);
-  _mapping_to.append(to_interval);
+	TRACE_LINEAR_SCAN(4, tty->print_cr("MoveResolver: adding mapping from %d (%d, %d) to %d (%d, %d)", from_interval->reg_num(), from_interval->assigned_reg(), from_interval->assigned_regHi(), to_interval->reg_num(), to_interval->assigned_reg(), to_interval->assigned_regHi()));
+
+	_mapping_from.append(from_interval);
+	_mapping_from_opr.append(LIR_OprFact::illegalOpr);
+	_mapping_to.append(to_interval);
 }


 void MoveResolver::add_mapping(LIR_Opr from_opr, Interval* to_interval) {
-  TRACE_LINEAR_SCAN(4, tty->print("MoveResolver: adding mapping from "); from_opr->print(); tty->print_cr(" to %d (%d, %d)", to_interval->reg_num(), to_interval->assigned_reg(), to_interval->assigned_regHi()));
-  assert(from_opr->is_constant(), "only for constants");
-
-  _mapping_from.append(NULL);
-  _mapping_from_opr.append(from_opr);
-  _mapping_to.append(to_interval);
+	TRACE_LINEAR_SCAN(4, tty->print("MoveResolver: adding mapping from "); from_opr->print(); tty->print_cr(" to %d (%d, %d)", to_interval->reg_num(), to_interval->assigned_reg(), to_interval->assigned_regHi()));
+	assert(from_opr->is_constant(), "only for constants");
+
+	_mapping_from.append(NULL);
+	_mapping_from_opr.append(from_opr);
+	_mapping_to.append(to_interval);
 }

 void MoveResolver::resolve_and_append_moves() {
-  if (has_mappings()) {
-    resolve_mappings();
-  }
-  append_insertion_buffer();
+	if (has_mappings()) {
+		resolve_mappings();
+	}
+	append_insertion_buffer();
 }


@@ -3841,52 +3846,52 @@
 // **** Implementation of Range *************************************

 Range::Range(int from, int to, Range* next) :
-  _from(from),
-  _to(to),
-  _next(next)
+	_from(from),
+	_to(to),
+	_next(next)
 {
 }

 // initialize sentinel
 Range* Range::_end = NULL;
 void Range::initialize() {
-  _end = new Range(max_jint, max_jint, NULL);
+	_end = new Range(max_jint, max_jint, NULL);
 }

 int Range::intersects_at(Range* r2) const {
-  const Range* r1 = this;
-
-  assert(r1 != NULL && r2 != NULL, "null ranges not allowed");
-  assert(r1 != _end && r2 != _end, "empty ranges not allowed");
-
-  do {
-    if (r1->from() < r2->from()) {
-      if (r1->to() <= r2->from()) {
-        r1 = r1->next(); if (r1 == _end) return -1;
-      } else {
-        return r2->from();
-      }
-    } else if (r2->from() < r1->from()) {
-      if (r2->to() <= r1->from()) {
-        r2 = r2->next(); if (r2 == _end) return -1;
-      } else {
-        return r1->from();
-      }
-    } else { // r1->from() == r2->from()
-      if (r1->from() == r1->to()) {
-        r1 = r1->next(); if (r1 == _end) return -1;
-      } else if (r2->from() == r2->to()) {
-        r2 = r2->next(); if (r2 == _end) return -1;
-      } else {
-        return r1->from();
-      }
-    }
-  } while (true);
+	const Range* r1 = this;
+
+	assert(r1 != NULL && r2 != NULL, "null ranges not allowed");
+	assert(r1 != _end && r2 != _end, "empty ranges not allowed");
+
+	do {
+		if (r1->from() < r2->from()) {
+			if (r1->to() <= r2->from()) {
+				r1 = r1->next(); if (r1 == _end) return -1;
+			} else {
+				return r2->from();
+			}
+		} else if (r2->from() < r1->from()) {
+			if (r2->to() <= r1->from()) {
+				r2 = r2->next(); if (r2 == _end) return -1;
+			} else {
+				return r1->from();
+			}
+		} else { // r1->from() == r2->from()
+			if (r1->from() == r1->to()) {
+				r1 = r1->next(); if (r1 == _end) return -1;
+			} else if (r2->from() == r2->to()) {
+				r2 = r2->next(); if (r2 == _end) return -1;
+			} else {
+				return r1->from();
+			}
+		}
+	} while (true);
 }

 #ifndef PRODUCT
 void Range::print(outputStream* out) const {
-  out->print("[%d, %d[ ", _from, _to);
+	out->print("[%d, %d[ ", _from, _to);
 }
 #endif

@@ -3897,312 +3902,312 @@
 // initialize sentinel
 Interval* Interval::_end = NULL;
 void Interval::initialize() {
-  Range::initialize();
-  _end = new Interval(-1);
+	Range::initialize();
+	_end = new Interval(-1);
 }

 Interval::Interval(int reg_num) :
-  _reg_num(reg_num),
-  _type(T_ILLEGAL),
-  _first(Range::end()),
-  _use_pos_and_kinds(12),
-  _current(Range::end()),
-  _next(_end),
-  _state(invalidState),
-  _assigned_reg(LinearScan::any_reg),
-  _assigned_regHi(LinearScan::any_reg),
-  _cached_to(-1),
-  _cached_opr(LIR_OprFact::illegalOpr),
-  _cached_vm_reg(VMRegImpl::Bad()),
-  _split_children(0),
-  _canonical_spill_slot(-1),
-  _insert_move_when_activated(false),
-  _register_hint(NULL),
-  _spill_state(noDefinitionFound),
-  _spill_definition_pos(-1)
+	_reg_num(reg_num),
+	_type(T_ILLEGAL),
+	_first(Range::end()),
+	_use_pos_and_kinds(12),
+	_current(Range::end()),
+	_next(_end),
+	_state(invalidState),
+	_assigned_reg(LinearScan::any_reg),
+	_assigned_regHi(LinearScan::any_reg),
+	_cached_to(-1),
+	_cached_opr(LIR_OprFact::illegalOpr),
+	_cached_vm_reg(VMRegImpl::Bad()),
+	_split_children(0),
+	_canonical_spill_slot(-1),
+	_insert_move_when_activated(false),
+	_register_hint(NULL),
+	_spill_state(noDefinitionFound),
+	_spill_definition_pos(-1)
 {
-  _split_parent = this;
-  _current_split_child = this;
+	_split_parent = this;
+	_current_split_child = this;
 }

 int Interval::calc_to() {
-  assert(_first != Range::end(), "interval has no range");
-
-  Range* r = _first;
-  while (r->next() != Range::end()) {
-    r = r->next();
-  }
-  return r->to();
+	assert(_first != Range::end(), "interval has no range");
+
+	Range* r = _first;
+	while (r->next() != Range::end()) {
+		r = r->next();
+	}
+	return r->to();
 }


 #ifdef ASSERT
 // consistency check of split-children
 void Interval::check_split_children() {
-  if (_split_children.length() > 0) {
-    assert(is_split_parent(), "only split parents can have children");
-
-    for (int i = 0; i < _split_children.length(); i++) {
-      Interval* i1 = _split_children.at(i);
-
-      assert(i1->split_parent() == this, "not a split child of this interval");
-      assert(i1->type() == type(), "must be equal for all split children");
-      assert(i1->canonical_spill_slot() == canonical_spill_slot(), "must be equal for all split children");
-
-      for (int j = i + 1; j < _split_children.length(); j++) {
-        Interval* i2 = _split_children.at(j);
-
-        assert(i1->reg_num() != i2->reg_num(), "same register number");
-
-        if (i1->from() < i2->from()) {
-          assert(i1->to() <= i2->from() && i1->to() < i2->to(), "intervals overlapping");
-        } else {
-          assert(i2->from() < i1->from(), "intervals start at same op_id");
-          assert(i2->to() <= i1->from() && i2->to() < i1->to(), "intervals overlapping");
-        }
-      }
-    }
-  }
+	if (_split_children.length() > 0) {
+		assert(is_split_parent(), "only split parents can have children");
+
+		for (int i = 0; i < _split_children.length(); i++) {
+			Interval* i1 = _split_children.at(i);
+
+			assert(i1->split_parent() == this, "not a split child of this interval");
+			assert(i1->type() == type(), "must be equal for all split children");
+			assert(i1->canonical_spill_slot() == canonical_spill_slot(), "must be equal for all split children");
+
+			for (int j = i + 1; j < _split_children.length(); j++) {
+				Interval* i2 = _split_children.at(j);
+
+				assert(i1->reg_num() != i2->reg_num(), "same register number");
+
+				if (i1->from() < i2->from()) {
+					assert(i1->to() <= i2->from() && i1->to() < i2->to(), "intervals overlapping");
+				} else {
+					assert(i2->from() < i1->from(), "intervals start at same op_id");
+					assert(i2->to() <= i1->from() && i2->to() < i1->to(), "intervals overlapping");
+				}
+			}
+		}
+	}
 }
 #endif // ASSERT

 Interval* Interval::register_hint(bool search_split_child) const {
-  if (!search_split_child) {
-    return _register_hint;
-  }
-
-  if (_register_hint != NULL) {
-    assert(_register_hint->is_split_parent(), "ony split parents are valid hint registers");
-
-    if (_register_hint->assigned_reg() >= 0 && _register_hint->assigned_reg() < LinearScan::nof_regs) {
-      return _register_hint;
-
-    } else if (_register_hint->_split_children.length() > 0) {
-      // search the first split child that has a register assigned
-      int len = _register_hint->_split_children.length();
-      for (int i = 0; i < len; i++) {
-        Interval* cur = _register_hint->_split_children.at(i);
-
-        if (cur->assigned_reg() >= 0 && cur->assigned_reg() < LinearScan::nof_regs) {
-          return cur;
-        }
-      }
-    }
-  }
-
-  // no hint interval found that has a register assigned
-  return NULL;
+	if (!search_split_child) {
+		return _register_hint;
+	}
+
+	if (_register_hint != NULL) {
+		assert(_register_hint->is_split_parent(), "ony split parents are valid hint registers");
+
+		if (_register_hint->assigned_reg() >= 0 && _register_hint->assigned_reg() < LinearScan::nof_regs) {
+			return _register_hint;
+
+		} else if (_register_hint->_split_children.length() > 0) {
+			// search the first split child that has a register assigned
+			int len = _register_hint->_split_children.length();
+			for (int i = 0; i < len; i++) {
+				Interval* cur = _register_hint->_split_children.at(i);
+
+				if (cur->assigned_reg() >= 0 && cur->assigned_reg() < LinearScan::nof_regs) {
+					return cur;
+				}
+			}
+		}
+	}
+
+	// no hint interval found that has a register assigned
+	return NULL;
 }


 Interval* Interval::split_child_at_op_id(int op_id, LIR_OpVisitState::OprMode mode) {
-  assert(is_split_parent(), "can only be called for split parents");
-  assert(op_id >= 0, "invalid op_id (method can not be called for spill moves)");
-
-  Interval* result;
-  if (_split_children.length() == 0) {
-    result = this;
-  } else {
-    result = NULL;
-    int len = _split_children.length();
-
-    // in outputMode, the end of the interval (op_id == cur->to()) is not valid
-    int to_offset = (mode == LIR_OpVisitState::outputMode ? 0 : 1);
-
-    int i;
-    for (i = 0; i < len; i++) {
-      Interval* cur = _split_children.at(i);
-      if (cur->from() <= op_id && op_id < cur->to() + to_offset) {
-        if (i > 0) {
-          // exchange current split child to start of list (faster access for next call)
-          _split_children.at_put(i, _split_children.at(0));
-          _split_children.at_put(0, cur);
-        }
-
-        // interval found
-        result = cur;
-        break;
-      }
-    }
+	assert(is_split_parent(), "can only be called for split parents");
+	assert(op_id >= 0, "invalid op_id (method can not be called for spill moves)");
+
+	Interval* result;
+	if (_split_children.length() == 0) {
+		result = this;
+	} else {
+		result = NULL;
+		int len = _split_children.length();
+
+		// in outputMode, the end of the interval (op_id == cur->to()) is not valid
+		int to_offset = (mode == LIR_OpVisitState::outputMode ? 0 : 1);
+
+		int i;
+		for (i = 0; i < len; i++) {
+			Interval* cur = _split_children.at(i);
+			if (cur->from() <= op_id && op_id < cur->to() + to_offset) {
+				if (i > 0) {
+					// exchange current split child to start of list (faster access for next call)
+					_split_children.at_put(i, _split_children.at(0));
+					_split_children.at_put(0, cur);
+				}
+
+				// interval found
+				result = cur;
+				break;
+			}
+		}

 #ifdef ASSERT
-    for (i = 0; i < len; i++) {
-      Interval* tmp = _split_children.at(i);
-      if (tmp != result && tmp->from() <= op_id && op_id < tmp->to() + to_offset) {
-        tty->print_cr("two valid result intervals found for op_id %d: %d and %d", op_id, result->reg_num(), tmp->reg_num());
-        result->print();
-        tmp->print();
-        assert(false, "two valid result intervals found");
-      }
-    }
+		for (i = 0; i < len; i++) {
+			Interval* tmp = _split_children.at(i);
+			if (tmp != result && tmp->from() <= op_id && op_id < tmp->to() + to_offset) {
+				tty->print_cr("two valid result intervals found for op_id %d: %d and %d", op_id, result->reg_num(), tmp->reg_num());
+				result->print();
+				tmp->print();
+				assert(false, "two valid result intervals found");
+			}
+		}
 #endif
-  }
-
-  assert(result != NULL, "no matching interval found");
-  assert(result->covers(op_id, mode), "op_id not covered by interval");
-
-  return result;
+	}
+
+	assert(result != NULL, "no matching interval found");
+	assert(result->covers(op_id, mode), "op_id not covered by interval");
+
+	return result;
 }


 // returns the last split child that ends before the given op_id
 Interval* Interval::split_child_before_op_id(int op_id) {
-  assert(op_id >= 0, "invalid op_id");
-
-  Interval* parent = split_parent();
-  Interval* result = NULL;
-
-  int len = parent->_split_children.length();
-  assert(len > 0, "no split children available");
-
-  for (int i = len - 1; i >= 0; i--) {
-    Interval* cur = parent->_split_children.at(i);
-    if (cur->to() <= op_id && (result == NULL || result->to() < cur->to())) {
-      result = cur;
-    }
-  }
-
-  assert(result != NULL, "no split child found");
-  return result;
+	assert(op_id >= 0, "invalid op_id");
+
+	Interval* parent = split_parent();
+	Interval* result = NULL;
+
+	int len = parent->_split_children.length();
+	assert(len > 0, "no split children available");
+
+	for (int i = len - 1; i >= 0; i--) {
+		Interval* cur = parent->_split_children.at(i);
+		if (cur->to() <= op_id && (result == NULL || result->to() < cur->to())) {
+			result = cur;
+		}
+	}
+
+	assert(result != NULL, "no split child found");
+	return result;
 }


 // checks if op_id is covered by any split child
 bool Interval::split_child_covers(int op_id, LIR_OpVisitState::OprMode mode) {
-  assert(is_split_parent(), "can only be called for split parents");
-  assert(op_id >= 0, "invalid op_id (method can not be called for spill moves)");
-
-  if (_split_children.length() == 0) {
-    // simple case if interval was not split
-    return covers(op_id, mode);
-
-  } else {
-    // extended case: check all split children
-    int len = _split_children.length();
-    for (int i = 0; i < len; i++) {
-      Interval* cur = _split_children.at(i);
-      if (cur->covers(op_id, mode)) {
-        return true;
-      }
-    }
-    return false;
-  }
+	assert(is_split_parent(), "can only be called for split parents");
+	assert(op_id >= 0, "invalid op_id (method can not be called for spill moves)");
+
+	if (_split_children.length() == 0) {
+		// simple case if interval was not split
+		return covers(op_id, mode);
+
+	} else {
+		// extended case: check all split children
+		int len = _split_children.length();
+		for (int i = 0; i < len; i++) {
+			Interval* cur = _split_children.at(i);
+			if (cur->covers(op_id, mode)) {
+				return true;
+			}
+		}
+		return false;
+	}
 }


 // Note: use positions are sorted descending -> first use has highest index
 int Interval::first_usage(IntervalUseKind min_use_kind) const {
-  assert(LinearScan::is_virtual_interval(this), "cannot access use positions for fixed intervals");
-
-  for (int i = _use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
-    if (_use_pos_and_kinds.at(i + 1) >= min_use_kind) {
-      return _use_pos_and_kinds.at(i);
-    }
-  }
-  return max_jint;
+	assert(LinearScan::is_virtual_interval(this), "cannot access use positions for fixed intervals");
+
+	for (int i = _use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
+		if (_use_pos_and_kinds.at(i + 1) >= min_use_kind) {
+			return _use_pos_and_kinds.at(i);
+		}
+	}
+	return max_jint;
 }

 int Interval::next_usage(IntervalUseKind min_use_kind, int from) const {
-  assert(LinearScan::is_virtual_interval(this), "cannot access use positions for fixed intervals");
-
-  for (int i = _use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
-    if (_use_pos_and_kinds.at(i) >= from && _use_pos_and_kinds.at(i + 1) >= min_use_kind) {
-      return _use_pos_and_kinds.at(i);
-    }
-  }
-  return max_jint;
+	assert(LinearScan::is_virtual_interval(this), "cannot access use positions for fixed intervals");
+
+	for (int i = _use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
+		if (_use_pos_and_kinds.at(i) >= from && _use_pos_and_kinds.at(i + 1) >= min_use_kind) {
+			return _use_pos_and_kinds.at(i);
+		}
+	}
+	return max_jint;
 }

 int Interval::next_usage_exact(IntervalUseKind exact_use_kind, int from) const {
-  assert(LinearScan::is_virtual_interval(this), "cannot access use positions for fixed intervals");
-
-  for (int i = _use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
-    if (_use_pos_and_kinds.at(i) >= from && _use_pos_and_kinds.at(i + 1) == exact_use_kind) {
-      return _use_pos_and_kinds.at(i);
-    }
-  }
-  return max_jint;
+	assert(LinearScan::is_virtual_interval(this), "cannot access use positions for fixed intervals");
+
+	for (int i = _use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
+		if (_use_pos_and_kinds.at(i) >= from && _use_pos_and_kinds.at(i + 1) == exact_use_kind) {
+			return _use_pos_and_kinds.at(i);
+		}
+	}
+	return max_jint;
 }

 int Interval::previous_usage(IntervalUseKind min_use_kind, int from) const {
-  assert(LinearScan::is_virtual_interval(this), "cannot access use positions for fixed intervals");
-
-  int prev = 0;
-  for (int i = _use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
-    if (_use_pos_and_kinds.at(i) > from) {
-      return prev;
-    }
-    if (_use_pos_and_kinds.at(i + 1) >= min_use_kind) {
-      prev = _use_pos_and_kinds.at(i);
-    }
-  }
-  return prev;
+	assert(LinearScan::is_virtual_interval(this), "cannot access use positions for fixed intervals");
+
+	int prev = 0;
+	for (int i = _use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
+		if (_use_pos_and_kinds.at(i) > from) {
+			return prev;
+		}
+		if (_use_pos_and_kinds.at(i + 1) >= min_use_kind) {
+			prev = _use_pos_and_kinds.at(i);
+		}
+	}
+	return prev;
 }

 void Interval::add_use_pos(int pos, IntervalUseKind use_kind) {
-  assert(covers(pos, LIR_OpVisitState::inputMode), "use position not covered by live range");
-
-  // do not add use positions for precolored intervals because
-  // they are never used
-  if (use_kind != noUse && reg_num() >= LIR_OprDesc::vreg_base) {
+	assert(covers(pos, LIR_OpVisitState::inputMode), "use position not covered by live range");
+
+	// do not add use positions for precolored intervals because
+	// they are never used
+	if (use_kind != noUse && reg_num() >= LIR_OprDesc::vreg_base) {
 #ifdef ASSERT
-    assert(_use_pos_and_kinds.length() % 2 == 0, "must be");
-    for (int i = 0; i < _use_pos_and_kinds.length(); i += 2) {
-      assert(pos <= _use_pos_and_kinds.at(i), "already added a use-position with lower position");
-      assert(_use_pos_and_kinds.at(i + 1) >= firstValidKind && _use_pos_and_kinds.at(i + 1) <= lastValidKind, "invalid use kind");
-      if (i > 0) {
-        assert(_use_pos_and_kinds.at(i) < _use_pos_and_kinds.at(i - 2), "not sorted descending");
-      }
-    }
+		assert(_use_pos_and_kinds.length() % 2 == 0, "must be");
+		for (int i = 0; i < _use_pos_and_kinds.length(); i += 2) {
+			assert(pos <= _use_pos_and_kinds.at(i), "already added a use-position with lower position");
+			assert(_use_pos_and_kinds.at(i + 1) >= firstValidKind && _use_pos_and_kinds.at(i + 1) <= lastValidKind, "invalid use kind");
+			if (i > 0) {
+				assert(_use_pos_and_kinds.at(i) < _use_pos_and_kinds.at(i - 2), "not sorted descending");
+			}
+		}
 #endif

-    // Note: add_use is called in descending order, so list gets sorted
-    //       automatically by just appending new use positions
-    int len = _use_pos_and_kinds.length();
-    if (len == 0 || _use_pos_and_kinds.at(len - 2) > pos) {
-      _use_pos_and_kinds.append(pos);
-      _use_pos_and_kinds.append(use_kind);
-    } else if (_use_pos_and_kinds.at(len - 1) < use_kind) {
-      assert(_use_pos_and_kinds.at(len - 2) == pos, "list not sorted correctly");
-      _use_pos_and_kinds.at_put(len - 1, use_kind);
-    }
-  }
+		// Note: add_use is called in descending order, so list gets sorted
+		//       automatically by just appending new use positions
+		int len = _use_pos_and_kinds.length();
+		if (len == 0 || _use_pos_and_kinds.at(len - 2) > pos) {
+			_use_pos_and_kinds.append(pos);
+			_use_pos_and_kinds.append(use_kind);
+		} else if (_use_pos_and_kinds.at(len - 1) < use_kind) {
+			assert(_use_pos_and_kinds.at(len - 2) == pos, "list not sorted correctly");
+			_use_pos_and_kinds.at_put(len - 1, use_kind);
+		}
+	}
 }

 void Interval::add_range(int from, int to) {
-  assert(from < to, "invalid range");
-  assert(first() == Range::end() || to < first()->next()->from(), "not inserting at begin of interval");
-  assert(from <= first()->to(), "not inserting at begin of interval");
-
-  if (first()->from() <= to) {
-    // join intersecting ranges
-    first()->set_from(MIN2(from, first()->from()));
-    first()->set_to  (MAX2(to,   first()->to()));
-  } else {
-    // insert new range
-    _first = new Range(from, to, first());
-  }
+	assert(from < to, "invalid range");
+	assert(first() == Range::end() || to < first()->next()->from(), "not inserting at begin of interval");
+	assert(from <= first()->to(), "not inserting at begin of interval");
+
+	if (first()->from() <= to) {
+		// join intersecting ranges
+		first()->set_from(MIN2(from, first()->from()));
+		first()->set_to  (MAX2(to,   first()->to()));
+	} else {
+		// insert new range
+		_first = new Range(from, to, first());
+	}
 }

 Interval* Interval::new_split_child() {
-  // allocate new interval
-  Interval* result = new Interval(-1);
-  result->set_type(type());
-
-  Interval* parent = split_parent();
-  result->_split_parent = parent;
-  result->set_register_hint(parent);
-
-  // insert new interval in children-list of parent
-  if (parent->_split_children.length() == 0) {
-    assert(is_split_parent(), "list must be initialized at first split");
-
-    parent->_split_children = IntervalList(4);
-    parent->_split_children.append(this);
-  }
-  parent->_split_children.append(result);
-
-  return result;
+	// allocate new interval
+	Interval* result = new Interval(-1);
+	result->set_type(type());
+
+	Interval* parent = split_parent();
+	result->_split_parent = parent;
+	result->set_register_hint(parent);
+
+	// insert new interval in children-list of parent
+	if (parent->_split_children.length() == 0) {
+		assert(is_split_parent(), "list must be initialized at first split");
+
+		parent->_split_children = IntervalList(4);
+		parent->_split_children.append(this);
+	}
+	parent->_split_children.append(result);
+
+	return result;
 }

 // split this interval at the specified position and return
@@ -4216,66 +4221,66 @@
 //
 // Note: The new interval has no valid reg_num
 Interval* Interval::split(int split_pos) {
-  assert(LinearScan::is_virtual_interval(this), "cannot split fixed intervals");
-
-  // allocate new interval
-  Interval* result = new_split_child();
-
-  // split the ranges
-  Range* prev = NULL;
-  Range* cur = _first;
-  while (cur != Range::end() && cur->to() <= split_pos) {
-    prev = cur;
-    cur = cur->next();
-  }
-  assert(cur != Range::end(), "split interval after end of last range");
-
-  if (cur->from() < split_pos) {
-    result->_first = new Range(split_pos, cur->to(), cur->next());
-    cur->set_to(split_pos);
-    cur->set_next(Range::end());
-
-  } else {
-    assert(prev != NULL, "split before start of first range");
-    result->_first = cur;
-    prev->set_next(Range::end());
-  }
-  result->_current = result->_first;
-  _cached_to = -1; // clear cached value
-
-  // split list of use positions
-  int total_len = _use_pos_and_kinds.length();
-  int start_idx = total_len - 2;
-  while (start_idx >= 0 && _use_pos_and_kinds.at(start_idx) < split_pos) {
-    start_idx -= 2;
-  }
-
-  intStack new_use_pos_and_kinds(total_len - start_idx);
-  int i;
-  for (i = start_idx + 2; i < total_len; i++) {
-    new_use_pos_and_kinds.append(_use_pos_and_kinds.at(i));
-  }
-
-  _use_pos_and_kinds.truncate(start_idx + 2);
-  result->_use_pos_and_kinds = _use_pos_and_kinds;
-  _use_pos_and_kinds = new_use_pos_and_kinds;
+	assert(LinearScan::is_virtual_interval(this), "cannot split fixed intervals");
+
+	// allocate new interval
+	Interval* result = new_split_child();
+
+	// split the ranges
+	Range* prev = NULL;
+	Range* cur = _first;
+	while (cur != Range::end() && cur->to() <= split_pos) {
+		prev = cur;
+		cur = cur->next();
+	}
+	assert(cur != Range::end(), "split interval after end of last range");
+
+	if (cur->from() < split_pos) {
+		result->_first = new Range(split_pos, cur->to(), cur->next());
+		cur->set_to(split_pos);
+		cur->set_next(Range::end());
+
+	} else {
+		assert(prev != NULL, "split before start of first range");
+		result->_first = cur;
+		prev->set_next(Range::end());
+	}
+	result->_current = result->_first;
+	_cached_to = -1; // clear cached value
+
+	// split list of use positions
+	int total_len = _use_pos_and_kinds.length();
+	int start_idx = total_len - 2;
+	while (start_idx >= 0 && _use_pos_and_kinds.at(start_idx) < split_pos) {
+		start_idx -= 2;
+	}
+
+	intStack new_use_pos_and_kinds(total_len - start_idx);
+	int i;
+	for (i = start_idx + 2; i < total_len; i++) {
+		new_use_pos_and_kinds.append(_use_pos_and_kinds.at(i));
+	}
+
+	_use_pos_and_kinds.truncate(start_idx + 2);
+	result->_use_pos_and_kinds = _use_pos_and_kinds;
+	_use_pos_and_kinds = new_use_pos_and_kinds;

 #ifdef ASSERT
-  assert(_use_pos_and_kinds.length() % 2 == 0, "must have use kind for each use pos");
-  assert(result->_use_pos_and_kinds.length() % 2 == 0, "must have use kind for each use pos");
-  assert(_use_pos_and_kinds.length() + result->_use_pos_and_kinds.length() == total_len, "missed some entries");
-
-  for (i = 0; i < _use_pos_and_kinds.length(); i += 2) {
-    assert(_use_pos_and_kinds.at(i) < split_pos, "must be");
-    assert(_use_pos_and_kinds.at(i + 1) >= firstValidKind && _use_pos_and_kinds.at(i + 1) <= lastValidKind, "invalid use kind");
-  }
-  for (i = 0; i < result->_use_pos_and_kinds.length(); i += 2) {
-    assert(result->_use_pos_and_kinds.at(i) >= split_pos, "must be");
-    assert(result->_use_pos_and_kinds.at(i + 1) >= firstValidKind && result->_use_pos_and_kinds.at(i + 1) <= lastValidKind, "invalid use kind");
-  }
+	assert(_use_pos_and_kinds.length() % 2 == 0, "must have use kind for each use pos");
+	assert(result->_use_pos_and_kinds.length() % 2 == 0, "must have use kind for each use pos");
+	assert(_use_pos_and_kinds.length() + result->_use_pos_and_kinds.length() == total_len, "missed some entries");
+
+	for (i = 0; i < _use_pos_and_kinds.length(); i += 2) {
+		assert(_use_pos_and_kinds.at(i) < split_pos, "must be");
+		assert(_use_pos_and_kinds.at(i + 1) >= firstValidKind && _use_pos_and_kinds.at(i + 1) <= lastValidKind, "invalid use kind");
+	}
+	for (i = 0; i < result->_use_pos_and_kinds.length(); i += 2) {
+		assert(result->_use_pos_and_kinds.at(i) >= split_pos, "must be");
+		assert(result->_use_pos_and_kinds.at(i + 1) >= firstValidKind && result->_use_pos_and_kinds.at(i + 1) <= lastValidKind, "invalid use kind");
+	}
 #endif

-  return result;
+	return result;
 }

 // split this interval at the specified position and return
@@ -4284,135 +4289,135 @@
 // Currently, only the first range can be split, and the new interval
 // must not have split positions
 Interval* Interval::split_from_start(int split_pos) {
-  assert(LinearScan::is_virtual_interval(this), "cannot split fixed intervals");
-  assert(split_pos > from() && split_pos < to(), "can only split inside interval");
-  assert(split_pos > _first->from() && split_pos <= _first->to(), "can only split inside first range");
-  assert(first_usage(noUse) > split_pos, "can not split when use positions are present");
-
-  // allocate new interval
-  Interval* result = new_split_child();
-
-  // the new created interval has only one range (checked by assertion above),
-  // so the splitting of the ranges is very simple
-  result->add_range(_first->from(), split_pos);
-
-  if (split_pos == _first->to()) {
-    assert(_first->next() != Range::end(), "must not be at end");
-    _first = _first->next();
-  } else {
-    _first->set_from(split_pos);
-  }
-
-  return result;
+	assert(LinearScan::is_virtual_interval(this), "cannot split fixed intervals");
+	assert(split_pos > from() && split_pos < to(), "can only split inside interval");
+	assert(split_pos > _first->from() && split_pos <= _first->to(), "can only split inside first range");
+	assert(first_usage(noUse) > split_pos, "can not split when use positions are present");
+
+	// allocate new interval
+	Interval* result = new_split_child();
+
+	// the new created interval has only one range (checked by assertion above),
+	// so the splitting of the ranges is very simple
+	result->add_range(_first->from(), split_pos);
+
+	if (split_pos == _first->to()) {
+		assert(_first->next() != Range::end(), "must not be at end");
+		_first = _first->next();
+	} else {
+		_first->set_from(split_pos);
+	}
+
+	return result;
 }


 // returns true if the op_id is inside the interval
 bool Interval::covers(int op_id, LIR_OpVisitState::OprMode mode) const {
-  Range* cur  = _first;
-
-  while (cur != Range::end() && cur->to() < op_id) {
-    cur = cur->next();
-  }
-  if (cur != Range::end()) {
-    assert(cur->to() != cur->next()->from(), "ranges not separated");
-
-    if (mode == LIR_OpVisitState::outputMode) {
-      return cur->from() <= op_id && op_id < cur->to();
-    } else {
-      return cur->from() <= op_id && op_id <= cur->to();
-    }
-  }
-  return false;
+	Range* cur  = _first;
+
+	while (cur != Range::end() && cur->to() < op_id) {
+		cur = cur->next();
+	}
+	if (cur != Range::end()) {
+		assert(cur->to() != cur->next()->from(), "ranges not separated");
+
+		if (mode == LIR_OpVisitState::outputMode) {
+			return cur->from() <= op_id && op_id < cur->to();
+		} else {
+			return cur->from() <= op_id && op_id <= cur->to();
+		}
+	}
+	return false;
 }

 // returns true if the interval has any hole between hole_from and hole_to
 // (even if the hole has only the length 1)
 bool Interval::has_hole_between(int hole_from, int hole_to) {
-  assert(hole_from < hole_to, "check");
-  assert(from() <= hole_from && hole_to <= to(), "index out of interval");
-
-  Range* cur  = _first;
-  while (cur != Range::end()) {
-    assert(cur->to() < cur->next()->from(), "no space between ranges");
-
-    // hole-range starts before this range -> hole
-    if (hole_from < cur->from()) {
-      return true;
-
-    // hole-range completely inside this range -> no hole
-    } else if (hole_to <= cur->to()) {
-      return false;
-
-    // overlapping of hole-range with this range -> hole
-    } else if (hole_from <= cur->to()) {
-      return true;
-    }
-
-    cur = cur->next();
-  }
-
-  return false;
+	assert(hole_from < hole_to, "check");
+	assert(from() <= hole_from && hole_to <= to(), "index out of interval");
+
+	Range* cur  = _first;
+	while (cur != Range::end()) {
+		assert(cur->to() < cur->next()->from(), "no space between ranges");
+
+		// hole-range starts before this range -> hole
+		if (hole_from < cur->from()) {
+			return true;
+
+			// hole-range completely inside this range -> no hole
+		} else if (hole_to <= cur->to()) {
+			return false;
+
+			// overlapping of hole-range with this range -> hole
+		} else if (hole_from <= cur->to()) {
+			return true;
+		}
+
+		cur = cur->next();
+	}
+
+	return false;
 }


 #ifndef PRODUCT
 void Interval::print(outputStream* out) const {
-  const char* SpillState2Name[] = { "no definition", "no spill store", "one spill store", "store at definition", "start in memory", "no optimization" };
-  const char* UseKind2Name[] = { "N", "L", "S", "M" };
-
-  const char* type_name;
-  LIR_Opr opr = LIR_OprFact::illegal();
-  if (reg_num() < LIR_OprDesc::vreg_base) {
-    type_name = "fixed";
-    // need a temporary operand for fixed intervals because type() cannot be called
-    if (assigned_reg() >= pd_first_cpu_reg && assigned_reg() <= pd_last_cpu_reg) {
-      opr = LIR_OprFact::single_cpu(assigned_reg());
-    } else if (assigned_reg() >= pd_first_fpu_reg && assigned_reg() <= pd_last_fpu_reg) {
-      opr = LIR_OprFact::single_fpu(assigned_reg() - pd_first_fpu_reg);
+	const char* SpillState2Name[] = { "no definition", "no spill store", "one spill store", "store at definition", "start in memory", "no optimization" };
+	const char* UseKind2Name[] = { "N", "L", "S", "M" };
+
+	const char* type_name;
+	LIR_Opr opr = LIR_OprFact::illegal();
+	if (reg_num() < LIR_OprDesc::vreg_base) {
+		type_name = "fixed";
+		// need a temporary operand for fixed intervals because type() cannot be called
+		if (assigned_reg() >= pd_first_cpu_reg && assigned_reg() <= pd_last_cpu_reg) {
+			opr = LIR_OprFact::single_cpu(assigned_reg());
+		} else if (assigned_reg() >= pd_first_fpu_reg && assigned_reg() <= pd_last_fpu_reg) {
+			opr = LIR_OprFact::single_fpu(assigned_reg() - pd_first_fpu_reg);
 #ifdef X86
-    } else if (assigned_reg() >= pd_first_xmm_reg && assigned_reg() <= pd_last_xmm_reg) {
-      opr = LIR_OprFact::single_xmm(assigned_reg() - pd_first_xmm_reg);
+		} else if (assigned_reg() >= pd_first_xmm_reg && assigned_reg() <= pd_last_xmm_reg) {
+			opr = LIR_OprFact::single_xmm(assigned_reg() - pd_first_xmm_reg);
 #endif
-    } else {
-      ShouldNotReachHere();
-    }
-  } else {
-    type_name = type2name(type());
-    if (assigned_reg() != -1) {
-      opr = LinearScan::calc_operand_for_interval(this);
-    }
-  }
-
-  out->print("%d %s ", reg_num(), type_name);
-  if (opr->is_valid()) {
-    out->print("\"");
-    opr->print(out);
-    out->print("\" ");
-  }
-  out->print("%d %d ", split_parent()->reg_num(), (register_hint(false) != NULL ? register_hint(false)->reg_num() : -1));
-
-  // print ranges
-  Range* cur = _first;
-  while (cur != Range::end()) {
-    cur->print(out);
-    cur = cur->next();
-    assert(cur != NULL, "range list not closed with range sentinel");
-  }
-
-  // print use positions
-  int prev = 0;
-  assert(_use_pos_and_kinds.length() % 2 == 0, "must be");
-  for (int i =_use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
-    assert(_use_pos_and_kinds.at(i + 1) >= firstValidKind && _use_pos_and_kinds.at(i + 1) <= lastValidKind, "invalid use kind");
-    assert(prev < _use_pos_and_kinds.at(i), "use positions not sorted");
-
-    out->print("%d %s ", _use_pos_and_kinds.at(i), UseKind2Name[_use_pos_and_kinds.at(i + 1)]);
-    prev = _use_pos_and_kinds.at(i);
-  }
-
-  out->print(" \"%s\"", SpillState2Name[spill_state()]);
-  out->cr();
+		} else {
+			ShouldNotReachHere();
+		}
+	} else {
+		type_name = type2name(type());
+		if (assigned_reg() != -1) {
+			opr = LinearScan::calc_operand_for_interval(this);
+		}
+	}
+
+	out->print("%d %s ", reg_num(), type_name);
+	if (opr->is_valid()) {
+		out->print("\"");
+		opr->print(out);
+		out->print("\" ");
+	}
+	out->print("%d %d ", split_parent()->reg_num(), (register_hint(false) != NULL ? register_hint(false)->reg_num() : -1));
+
+	// print ranges
+	Range* cur = _first;
+	while (cur != Range::end()) {
+		cur->print(out);
+		cur = cur->next();
+		assert(cur != NULL, "range list not closed with range sentinel");
+	}
+
+	// print use positions
+	int prev = 0;
+	assert(_use_pos_and_kinds.length() % 2 == 0, "must be");
+	for (int i =_use_pos_and_kinds.length() - 2; i >= 0; i -= 2) {
+		assert(_use_pos_and_kinds.at(i + 1) >= firstValidKind && _use_pos_and_kinds.at(i + 1) <= lastValidKind, "invalid use kind");
+		assert(prev < _use_pos_and_kinds.at(i), "use positions not sorted");
+
+		out->print("%d %s ", _use_pos_and_kinds.at(i), UseKind2Name[_use_pos_and_kinds.at(i + 1)]);
+		prev = _use_pos_and_kinds.at(i);
+	}
+
+	out->print(" \"%s\"", SpillState2Name[spill_state()]);
+	out->cr();
 }
 #endif

@@ -4420,213 +4425,213 @@

 // **** Implementation of IntervalWalker ****************************

-IntervalWalker::IntervalWalker(LinearScan* allocator, Interval* unhandled_fixed_first, Interval* unhandled_any_first)
- : _compilation(allocator->compilation())
- , _allocator(allocator)
+	IntervalWalker::IntervalWalker(LinearScan* allocator, Interval* unhandled_fixed_first, Interval* unhandled_any_first)
+	: _compilation(allocator->compilation())
+	  , _allocator(allocator)
 {
-  _unhandled_first[fixedKind] = unhandled_fixed_first;
-  _unhandled_first[anyKind]   = unhandled_any_first;
-  _active_first[fixedKind]    = Interval::end();
-  _inactive_first[fixedKind]  = Interval::end();
-  _active_first[anyKind]      = Interval::end();
-  _inactive_first[anyKind]    = Interval::end();
-  _current_position = -1;
-  _current = NULL;
-  next_interval();
+	_unhandled_first[fixedKind] = unhandled_fixed_first;
+	_unhandled_first[anyKind]   = unhandled_any_first;
+	_active_first[fixedKind]    = Interval::end();
+	_inactive_first[fixedKind]  = Interval::end();
+	_active_first[anyKind]      = Interval::end();
+	_inactive_first[anyKind]    = Interval::end();
+	_current_position = -1;
+	_current = NULL;
+	next_interval();
 }


 // append interval at top of list
 void IntervalWalker::append_unsorted(Interval** list, Interval* interval) {
-  interval->set_next(*list); *list = interval;
+	interval->set_next(*list); *list = interval;
 }


 // append interval in order of current range from()
 void IntervalWalker::append_sorted(Interval** list, Interval* interval) {
-  Interval* prev = NULL;
-  Interval* cur  = *list;
-  while (cur->current_from() < interval->current_from()) {
-    prev = cur; cur = cur->next();
-  }
-  if (prev == NULL) {
-    *list = interval;
-  } else {
-    prev->set_next(interval);
-  }
-  interval->set_next(cur);
+	Interval* prev = NULL;
+	Interval* cur  = *list;
+	while (cur->current_from() < interval->current_from()) {
+		prev = cur; cur = cur->next();
+	}
+	if (prev == NULL) {
+		*list = interval;
+	} else {
+		prev->set_next(interval);
+	}
+	interval->set_next(cur);
 }

 void IntervalWalker::append_to_unhandled(Interval** list, Interval* interval) {
-  assert(interval->from() >= current()->current_from(), "cannot append new interval before current walk position");
-
-  Interval* prev = NULL;
-  Interval* cur  = *list;
-  while (cur->from() < interval->from() || (cur->from() == interval->from() && cur->first_usage(noUse) < interval->first_usage(noUse))) {
-    prev = cur; cur = cur->next();
-  }
-  if (prev == NULL) {
-    *list = interval;
-  } else {
-    prev->set_next(interval);
-  }
-  interval->set_next(cur);
+	assert(interval->from() >= current()->current_from(), "cannot append new interval before current walk position");
+
+	Interval* prev = NULL;
+	Interval* cur  = *list;
+	while (cur->from() < interval->from() || (cur->from() == interval->from() && cur->first_usage(noUse) < interval->first_usage(noUse))) {
+		prev = cur; cur = cur->next();
+	}
+	if (prev == NULL) {
+		*list = interval;
+	} else {
+		prev->set_next(interval);
+	}
+	interval->set_next(cur);
 }


 inline bool IntervalWalker::remove_from_list(Interval** list, Interval* i) {
-  while (*list != Interval::end() && *list != i) {
-    list = (*list)->next_addr();
-  }
-  if (*list != Interval::end()) {
-    assert(*list == i, "check");
-    *list = (*list)->next();
-    return true;
-  } else {
-    return false;
-  }
+	while (*list != Interval::end() && *list != i) {
+		list = (*list)->next_addr();
+	}
+	if (*list != Interval::end()) {
+		assert(*list == i, "check");
+		*list = (*list)->next();
+		return true;
+	} else {
+		return false;
+	}
 }

 void IntervalWalker::remove_from_list(Interval* i) {
-  bool deleted;
-
-  if (i->state() == activeState) {
-    deleted = remove_from_list(active_first_addr(anyKind), i);
-  } else {
-    assert(i->state() == inactiveState, "invalid state");
-    deleted = remove_from_list(inactive_first_addr(anyKind), i);
-  }
-
-  assert(deleted, "interval has not been found in list");
+	bool deleted;
+
+	if (i->state() == activeState) {
+		deleted = remove_from_list(active_first_addr(anyKind), i);
+	} else {
+		assert(i->state() == inactiveState, "invalid state");
+		deleted = remove_from_list(inactive_first_addr(anyKind), i);
+	}
+
+	assert(deleted, "interval has not been found in list");
 }


 void IntervalWalker::walk_to(IntervalState state, int from) {
-  assert (state == activeState || state == inactiveState, "wrong state");
-  for_each_interval_kind(kind) {
-    Interval** prev = state == activeState ? active_first_addr(kind) : inactive_first_addr(kind);
-    Interval* next   = *prev;
-    while (next->current_from() <= from) {
-      Interval* cur = next;
-      next = cur->next();
-
-      bool range_has_changed = false;
-      while (cur->current_to() <= from) {
-        cur->next_range();
-        range_has_changed = true;
-      }
-
-      // also handle move from inactive list to active list
-      range_has_changed = range_has_changed || (state == inactiveState && cur->current_from() <= from);
-
-      if (range_has_changed) {
-        // remove cur from list
-        *prev = next;
-        if (cur->current_at_end()) {
-          // move to handled state (not maintained as a list)
-          cur->set_state(handledState);
-          interval_moved(cur, kind, state, handledState);
-        } else if (cur->current_from() <= from){
-          // sort into active list
-          append_sorted(active_first_addr(kind), cur);
-          cur->set_state(activeState);
-          if (*prev == cur) {
-            assert(state == activeState, "check");
-            prev = cur->next_addr();
-          }
-          interval_moved(cur, kind, state, activeState);
-        } else {
-          // sort into inactive list
-          append_sorted(inactive_first_addr(kind), cur);
-          cur->set_state(inactiveState);
-          if (*prev == cur) {
-            assert(state == inactiveState, "check");
-            prev = cur->next_addr();
-          }
-          interval_moved(cur, kind, state, inactiveState);
-        }
-      } else {
-        prev = cur->next_addr();
-        continue;
-      }
-    }
-  }
+	assert (state == activeState || state == inactiveState, "wrong state");
+	for_each_interval_kind(kind) {
+		Interval** prev = state == activeState ? active_first_addr(kind) : inactive_first_addr(kind);
+		Interval* next   = *prev;
+		while (next->current_from() <= from) {
+			Interval* cur = next;
+			next = cur->next();
+
+			bool range_has_changed = false;
+			while (cur->current_to() <= from) {
+				cur->next_range();
+				range_has_changed = true;
+			}
+
+			// also handle move from inactive list to active list
+			range_has_changed = range_has_changed || (state == inactiveState && cur->current_from() <= from);
+
+			if (range_has_changed) {
+				// remove cur from list
+				*prev = next;
+				if (cur->current_at_end()) {
+					// move to handled state (not maintained as a list)
+					cur->set_state(handledState);
+					interval_moved(cur, kind, state, handledState);
+				} else if (cur->current_from() <= from){
+					// sort into active list
+					append_sorted(active_first_addr(kind), cur);
+					cur->set_state(activeState);
+					if (*prev == cur) {
+						assert(state == activeState, "check");
+						prev = cur->next_addr();
+					}
+					interval_moved(cur, kind, state, activeState);
+				} else {
+					// sort into inactive list
+					append_sorted(inactive_first_addr(kind), cur);
+					cur->set_state(inactiveState);
+					if (*prev == cur) {
+						assert(state == inactiveState, "check");
+						prev = cur->next_addr();
+					}
+					interval_moved(cur, kind, state, inactiveState);
+				}
+			} else {
+				prev = cur->next_addr();
+				continue;
+			}
+		}
+	}
 }


 void IntervalWalker::next_interval() {
-  IntervalKind kind;
-  Interval* any   = _unhandled_first[anyKind];
-  Interval* fixed = _unhandled_first[fixedKind];
-
-  if (any != Interval::end()) {
-    // intervals may start at same position -> prefer fixed interval
-    kind = fixed != Interval::end() && fixed->from() <= any->from() ? fixedKind : anyKind;
-
-    assert (kind == fixedKind && fixed->from() <= any->from() ||
-            kind == anyKind   && any->from() <= fixed->from(), "wrong interval!!!");
-    assert(any == Interval::end() || fixed == Interval::end() || any->from() != fixed->from() || kind == fixedKind, "if fixed and any-Interval start at same position, fixed must be processed first");
-
-  } else if (fixed != Interval::end()) {
-    kind = fixedKind;
-  } else {
-    _current = NULL; return;
-  }
-  _current_kind = kind;
-  _current = _unhandled_first[kind];
-  _unhandled_first[kind] = _current->next();
-  _current->set_next(Interval::end());
-  _current->rewind_range();
+	IntervalKind kind;
+	Interval* any   = _unhandled_first[anyKind];
+	Interval* fixed = _unhandled_first[fixedKind];
+
+	if (any != Interval::end()) {
+		// intervals may start at same position -> prefer fixed interval
+		kind = fixed != Interval::end() && fixed->from() <= any->from() ? fixedKind : anyKind;
+
+		assert (kind == fixedKind && fixed->from() <= any->from() ||
+				kind == anyKind   && any->from() <= fixed->from(), "wrong interval!!!");
+		assert(any == Interval::end() || fixed == Interval::end() || any->from() != fixed->from() || kind == fixedKind, "if fixed and any-Interval start at same position, fixed must be processed first");
+
+	} else if (fixed != Interval::end()) {
+		kind = fixedKind;
+	} else {
+		_current = NULL; return;
+	}
+	_current_kind = kind;
+	_current = _unhandled_first[kind];
+	_unhandled_first[kind] = _current->next();
+	_current->set_next(Interval::end());
+	_current->rewind_range();
 }


 void IntervalWalker::walk_to(int lir_op_id) {
-  assert(_current_position <= lir_op_id, "can not walk backwards");
-  while (current() != NULL) {
-    bool is_active = current()->from() <= lir_op_id;
-    int id = is_active ? current()->from() : lir_op_id;
-
-    TRACE_LINEAR_SCAN(2, if (_current_position < id) { tty->cr(); tty->print_cr("walk_to(%d) **************************************************************", id); })
-
-    // set _current_position prior to call of walk_to
-    _current_position = id;
-
-    // call walk_to even if _current_position == id
-    walk_to(activeState, id);
-    walk_to(inactiveState, id);
-
-    if (is_active) {
-      current()->set_state(activeState);
-      if (activate_current()) {
-        append_sorted(active_first_addr(current_kind()), current());
-        interval_moved(current(), current_kind(), unhandledState, activeState);
-      }
-
-      next_interval();
-    } else {
-      return;
-    }
-  }
+	assert(_current_position <= lir_op_id, "can not walk backwards");
+	while (current() != NULL) {
+		bool is_active = current()->from() <= lir_op_id;
+		int id = is_active ? current()->from() : lir_op_id;
+
+		TRACE_LINEAR_SCAN(2, if (_current_position < id) { tty->cr(); tty->print_cr("walk_to(%d) **************************************************************", id); })
+
+			// set _current_position prior to call of walk_to
+			_current_position = id;
+
+		// call walk_to even if _current_position == id
+		walk_to(activeState, id);
+		walk_to(inactiveState, id);
+
+		if (is_active) {
+			current()->set_state(activeState);
+			if (activate_current()) {
+				append_sorted(active_first_addr(current_kind()), current());
+				interval_moved(current(), current_kind(), unhandledState, activeState);
+			}
+
+			next_interval();
+		} else {
+			return;
+		}
+	}
 }

 void IntervalWalker::interval_moved(Interval* interval, IntervalKind kind, IntervalState from, IntervalState to) {
 #ifndef PRODUCT
-  if (TraceLinearScanLevel >= 4) {
-    #define print_state(state) \
-    switch(state) {\
-      case unhandledState: tty->print("unhandled"); break;\
-      case activeState: tty->print("active"); break;\
-      case inactiveState: tty->print("inactive"); break;\
-      case handledState: tty->print("handled"); break;\
-      default: ShouldNotReachHere(); \
-    }
-
-    print_state(from); tty->print(" to "); print_state(to);
-    tty->fill_to(23);
-    interval->print();
-
-    #undef print_state
-  }
+	if (TraceLinearScanLevel >= 4) {
+#define print_state(state) \
+		switch(state) {\
+			case unhandledState: tty->print("unhandled"); break;\
+			case activeState: tty->print("active"); break;\
+			case inactiveState: tty->print("inactive"); break;\
+			case handledState: tty->print("handled"); break;\
+			default: ShouldNotReachHere(); \
+		}
+
+		print_state(from); tty->print(" to "); print_state(to);
+		tty->fill_to(23);
+		interval->print();
+
+#undef print_state
+	}
 #endif
 }

@@ -4634,1528 +4639,1528 @@

 // **** Implementation of LinearScanWalker **************************

-LinearScanWalker::LinearScanWalker(LinearScan* allocator, Interval* unhandled_fixed_first, Interval* unhandled_any_first)
-  : IntervalWalker(allocator, unhandled_fixed_first, unhandled_any_first)
-  , _move_resolver(allocator)
+	LinearScanWalker::LinearScanWalker(LinearScan* allocator, Interval* unhandled_fixed_first, Interval* unhandled_any_first)
+	: IntervalWalker(allocator, unhandled_fixed_first, unhandled_any_first)
+	  , _move_resolver(allocator)
 {
-  for (int i = 0; i < LinearScan::nof_regs; i++) {
-    _spill_intervals[i] = new IntervalList(2);
-  }
+	for (int i = 0; i < LinearScan::nof_regs; i++) {
+		_spill_intervals[i] = new IntervalList(2);
+	}
 }


 inline void LinearScanWalker::init_use_lists(bool only_process_use_pos) {
-  for (int i = _first_reg; i <= _last_reg; i++) {
-    _use_pos[i] = max_jint;
-
-    if (!only_process_use_pos) {
-      _block_pos[i] = max_jint;
-      _spill_intervals[i]->clear();
-    }
-  }
+	for (int i = _first_reg; i <= _last_reg; i++) {
+		_use_pos[i] = max_jint;
+
+		if (!only_process_use_pos) {
+			_block_pos[i] = max_jint;
+			_spill_intervals[i]->clear();
+		}
+	}
 }

 inline void LinearScanWalker::exclude_from_use(int reg) {
-  assert(reg < LinearScan::nof_regs, "interval must have a register assigned (stack slots not allowed)");
-  if (reg >= _first_reg && reg <= _last_reg) {
-    _use_pos[reg] = 0;
-  }
+	assert(reg < LinearScan::nof_regs, "interval must have a register assigned (stack slots not allowed)");
+	if (reg >= _first_reg && reg <= _last_reg) {
+		_use_pos[reg] = 0;
+	}
 }
 inline void LinearScanWalker::exclude_from_use(Interval* i) {
-  assert(i->assigned_reg() != any_reg, "interval has no register assigned");
-
-  exclude_from_use(i->assigned_reg());
-  exclude_from_use(i->assigned_regHi());
+	assert(i->assigned_reg() != any_reg, "interval has no register assigned");
+
+	exclude_from_use(i->assigned_reg());
+	exclude_from_use(i->assigned_regHi());
 }

 inline void LinearScanWalker::set_use_pos(int reg, Interval* i, int use_pos, bool only_process_use_pos) {
-  assert(use_pos != 0, "must use exclude_from_use to set use_pos to 0");
-
-  if (reg >= _first_reg && reg <= _last_reg) {
-    if (_use_pos[reg] > use_pos) {
-      _use_pos[reg] = use_pos;
-    }
-    if (!only_process_use_pos) {
-      _spill_intervals[reg]->append(i);
-    }
-  }
+	assert(use_pos != 0, "must use exclude_from_use to set use_pos to 0");
+
+	if (reg >= _first_reg && reg <= _last_reg) {
+		if (_use_pos[reg] > use_pos) {
+			_use_pos[reg] = use_pos;
+		}
+		if (!only_process_use_pos) {
+			_spill_intervals[reg]->append(i);
+		}
+	}
 }
 inline void LinearScanWalker::set_use_pos(Interval* i, int use_pos, bool only_process_use_pos) {
-  assert(i->assigned_reg() != any_reg, "interval has no register assigned");
-  if (use_pos != -1) {
-    set_use_pos(i->assigned_reg(), i, use_pos, only_process_use_pos);
-    set_use_pos(i->assigned_regHi(), i, use_pos, only_process_use_pos);
-  }
+	assert(i->assigned_reg() != any_reg, "interval has no register assigned");
+	if (use_pos != -1) {
+		set_use_pos(i->assigned_reg(), i, use_pos, only_process_use_pos);
+		set_use_pos(i->assigned_regHi(), i, use_pos, only_process_use_pos);
+	}
 }

 inline void LinearScanWalker::set_block_pos(int reg, Interval* i, int block_pos) {
-  if (reg >= _first_reg && reg <= _last_reg) {
-    if (_block_pos[reg] > block_pos) {
-      _block_pos[reg] = block_pos;
-    }
-    if (_use_pos[reg] > block_pos) {
-      _use_pos[reg] = block_pos;
-    }
-  }
+	if (reg >= _first_reg && reg <= _last_reg) {
+		if (_block_pos[reg] > block_pos) {
+			_block_pos[reg] = block_pos;
+		}
+		if (_use_pos[reg] > block_pos) {
+			_use_pos[reg] = block_pos;
+		}
+	}
 }
 inline void LinearScanWalker::set_block_pos(Interval* i, int block_pos) {
-  assert(i->assigned_reg() != any_reg, "interval has no register assigned");
-  if (block_pos != -1) {
-    set_block_pos(i->assigned_reg(), i, block_pos);
-    set_block_pos(i->assigned_regHi(), i, block_pos);
-  }
+	assert(i->assigned_reg() != any_reg, "interval has no register assigned");
+	if (block_pos != -1) {
+		set_block_pos(i->assigned_reg(), i, block_pos);
+		set_block_pos(i->assigned_regHi(), i, block_pos);
+	}
 }


 void LinearScanWalker::free_exclude_active_fixed() {
-  Interval* list = active_first(fixedKind);
-  while (list != Interval::end()) {
-    assert(list->assigned_reg() < LinearScan::nof_regs, "active interval must have a register assigned");
-    exclude_from_use(list);
-    list = list->next();
-  }
+	Interval* list = active_first(fixedKind);
+	while (list != Interval::end()) {
+		assert(list->assigned_reg() < LinearScan::nof_regs, "active interval must have a register assigned");
+		exclude_from_use(list);
+		list = list->next();
+	}
 }

 void LinearScanWalker::free_exclude_active_any() {
-  Interval* list = active_first(anyKind);
-  while (list != Interval::end()) {
-    exclude_from_use(list);
-    list = list->next();
-  }
+	Interval* list = active_first(anyKind);
+	while (list != Interval::end()) {
+		exclude_from_use(list);
+		list = list->next();
+	}
 }

 void LinearScanWalker::free_collect_inactive_fixed(Interval* cur) {
-  Interval* list = inactive_first(fixedKind);
-  while (list != Interval::end()) {
-    if (cur->to() <= list->current_from()) {
-      assert(list->current_intersects_at(cur) == -1, "must not intersect");
-      set_use_pos(list, list->current_from(), true);
-    } else {
-      set_use_pos(list, list->current_intersects_at(cur), true);
-    }
-    list = list->next();
-  }
+	Interval* list = inactive_first(fixedKind);
+	while (list != Interval::end()) {
+		if (cur->to() <= list->current_from()) {
+			assert(list->current_intersects_at(cur) == -1, "must not intersect");
+			set_use_pos(list, list->current_from(), true);
+		} else {
+			set_use_pos(list, list->current_intersects_at(cur), true);
+		}
+		list = list->next();
+	}
 }

 void LinearScanWalker::free_collect_inactive_any(Interval* cur) {
-  Interval* list = inactive_first(anyKind);
-  while (list != Interval::end()) {
-    set_use_pos(list, list->current_intersects_at(cur), true);
-    list = list->next();
-  }
+	Interval* list = inactive_first(anyKind);
+	while (list != Interval::end()) {
+		set_use_pos(list, list->current_intersects_at(cur), true);
+		list = list->next();
+	}
 }

 void LinearScanWalker::free_collect_unhandled(IntervalKind kind, Interval* cur) {
-  Interval* list = unhandled_first(kind);
-  while (list != Interval::end()) {
-    set_use_pos(list, list->intersects_at(cur), true);
-    if (kind == fixedKind && cur->to() <= list->from()) {
-      set_use_pos(list, list->from(), true);
-    }
-    list = list->next();
-  }
+	Interval* list = unhandled_first(kind);
+	while (list != Interval::end()) {
+		set_use_pos(list, list->intersects_at(cur), true);
+		if (kind == fixedKind && cur->to() <= list->from()) {
+			set_use_pos(list, list->from(), true);
+		}
+		list = list->next();
+	}
 }

 void LinearScanWalker::spill_exclude_active_fixed() {
-  Interval* list = active_first(fixedKind);
-  while (list != Interval::end()) {
-    exclude_from_use(list);
-    list = list->next();
-  }
+	Interval* list = active_first(fixedKind);
+	while (list != Interval::end()) {
+		exclude_from_use(list);
+		list = list->next();
+	}
 }

 void LinearScanWalker::spill_block_unhandled_fixed(Interval* cur) {
-  Interval* list = unhandled_first(fixedKind);
-  while (list != Interval::end()) {
-    set_block_pos(list, list->intersects_at(cur));
-    list = list->next();
-  }
+	Interval* list = unhandled_first(fixedKind);
+	while (list != Interval::end()) {
+		set_block_pos(list, list->intersects_at(cur));
+		list = list->next();
+	}
 }

 void LinearScanWalker::spill_block_inactive_fixed(Interval* cur) {
-  Interval* list = inactive_first(fixedKind);
-  while (list != Interval::end()) {
-    if (cur->to() > list->current_from()) {
-      set_block_pos(list, list->current_intersects_at(cur));
-    } else {
-      assert(list->current_intersects_at(cur) == -1, "invalid optimization: intervals intersect");
-    }
-
-    list = list->next();
-  }
+	Interval* list = inactive_first(fixedKind);
+	while (list != Interval::end()) {
+		if (cur->to() > list->current_from()) {
+			set_block_pos(list, list->current_intersects_at(cur));
+		} else {
+			assert(list->current_intersects_at(cur) == -1, "invalid optimization: intervals intersect");
+		}
+
+		list = list->next();
+	}
 }

 void LinearScanWalker::spill_collect_active_any() {
-  Interval* list = active_first(anyKind);
-  while (list != Interval::end()) {
-    set_use_pos(list, MIN2(list->next_usage(loopEndMarker, _current_position), list->to()), false);
-    list = list->next();
-  }
+	Interval* list = active_first(anyKind);
+	while (list != Interval::end()) {
+		set_use_pos(list, MIN2(list->next_usage(loopEndMarker, _current_position), list->to()), false);
+		list = list->next();
+	}
 }

 void LinearScanWalker::spill_collect_inactive_any(Interval* cur) {
-  Interval* list = inactive_first(anyKind);
-  while (list != Interval::end()) {
-    if (list->current_intersects(cur)) {
-      set_use_pos(list, MIN2(list->next_usage(loopEndMarker, _current_position), list->to()), false);
-    }
-    list = list->next();
-  }
+	Interval* list = inactive_first(anyKind);
+	while (list != Interval::end()) {
+		if (list->current_intersects(cur)) {
+			set_use_pos(list, MIN2(list->next_usage(loopEndMarker, _current_position), list->to()), false);
+		}
+		list = list->next();
+	}
 }


 void LinearScanWalker::insert_move(int op_id, Interval* src_it, Interval* dst_it) {
-  // output all moves here. When source and target are equal, the move is
-  // optimized away later in assign_reg_nums
-
-  op_id = (op_id + 1) & ~1;
-  BlockBegin* op_block = allocator()->block_of_op_with_id(op_id);
-  assert(op_id > 0 && allocator()->block_of_op_with_id(op_id - 2) == op_block, "cannot insert move at block boundary");
-
-  // calculate index of instruction inside instruction list of current block
-  // the minimal index (for a block with no spill moves) can be calculated because the
-  // numbering of instructions is known.
-  // When the block already contains spill moves, the index must be increased until the
-  // correct index is reached.
-  LIR_OpList* list = op_block->lir()->instructions_list();
-  int index = (op_id - list->at(0)->id()) / 2;
-  assert(list->at(index)->id() <= op_id, "error in calculation");
-
-  while (list->at(index)->id() != op_id) {
-    index++;
-    assert(0 <= index && index < list->length(), "index out of bounds");
-  }
-  assert(1 <= index && index < list->length(), "index out of bounds");
-  assert(list->at(index)->id() == op_id, "error in calculation");
-
-  // insert new instruction before instruction at position index
-  _move_resolver.move_insert_position(op_block->lir(), index - 1);
-  _move_resolver.add_mapping(src_it, dst_it);
+	// output all moves here. When source and target are equal, the move is
+	// optimized away later in assign_reg_nums
+
+	op_id = (op_id + 1) & ~1;
+	BlockBegin* op_block = allocator()->block_of_op_with_id(op_id);
+	assert(op_id > 0 && allocator()->block_of_op_with_id(op_id - 2) == op_block, "cannot insert move at block boundary");
+
+	// calculate index of instruction inside instruction list of current block
+	// the minimal index (for a block with no spill moves) can be calculated because the
+	// numbering of instructions is known.
+	// When the block already contains spill moves, the index must be increased until the
+	// correct index is reached.
+	LIR_OpList* list = op_block->lir()->instructions_list();
+	int index = (op_id - list->at(0)->id()) / 2;
+	assert(list->at(index)->id() <= op_id, "error in calculation");
+
+	while (list->at(index)->id() != op_id) {
+		index++;
+		assert(0 <= index && index < list->length(), "index out of bounds");
+	}
+	assert(1 <= index && index < list->length(), "index out of bounds");
+	assert(list->at(index)->id() == op_id, "error in calculation");
+
+	// insert new instruction before instruction at position index
+	_move_resolver.move_insert_position(op_block->lir(), index - 1);
+	_move_resolver.add_mapping(src_it, dst_it);
 }


 int LinearScanWalker::find_optimal_split_pos(BlockBegin* min_block, BlockBegin* max_block, int max_split_pos) {
-  int from_block_nr = min_block->linear_scan_number();
-  int to_block_nr = max_block->linear_scan_number();
-
-  assert(0 <= from_block_nr && from_block_nr < block_count(), "out of range");
-  assert(0 <= to_block_nr && to_block_nr < block_count(), "out of range");
-  assert(from_block_nr < to_block_nr, "must cross block boundary");
-
-  // Try to split at end of max_block. If this would be after
-  // max_split_pos, then use the begin of max_block
-  int optimal_split_pos = max_block->last_lir_instruction_id() + 2;
-  if (optimal_split_pos > max_split_pos) {
-    optimal_split_pos = max_block->first_lir_instruction_id();
-  }
-
-  int min_loop_depth = max_block->loop_depth();
-  for (int i = to_block_nr - 1; i >= from_block_nr; i--) {
-    BlockBegin* cur = block_at(i);
-
-    if (cur->loop_depth() < min_loop_depth) {
-      // block with lower loop-depth found -> split at the end of this block
-      min_loop_depth = cur->loop_depth();
-      optimal_split_pos = cur->last_lir_instruction_id() + 2;
-    }
-  }
-  assert(optimal_split_pos > allocator()->max_lir_op_id() || allocator()->is_block_begin(optimal_split_pos), "algorithm must move split pos to block boundary");
-
-  return optimal_split_pos;
+	int from_block_nr = min_block->linear_scan_number();
+	int to_block_nr = max_block->linear_scan_number();
+
+	assert(0 <= from_block_nr && from_block_nr < block_count(), "out of range");
+	assert(0 <= to_block_nr && to_block_nr < block_count(), "out of range");
+	assert(from_block_nr < to_block_nr, "must cross block boundary");
+
+	// Try to split at end of max_block. If this would be after
+	// max_split_pos, then use the begin of max_block
+	int optimal_split_pos = max_block->last_lir_instruction_id() + 2;
+	if (optimal_split_pos > max_split_pos) {
+		optimal_split_pos = max_block->first_lir_instruction_id();
+	}
+
+	int min_loop_depth = max_block->loop_depth();
+	for (int i = to_block_nr - 1; i >= from_block_nr; i--) {
+		BlockBegin* cur = block_at(i);
+
+		if (cur->loop_depth() < min_loop_depth) {
+			// block with lower loop-depth found -> split at the end of this block
+			min_loop_depth = cur->loop_depth();
+			optimal_split_pos = cur->last_lir_instruction_id() + 2;
+		}
+	}
+	assert(optimal_split_pos > allocator()->max_lir_op_id() || allocator()->is_block_begin(optimal_split_pos), "algorithm must move split pos to block boundary");
+
+	return optimal_split_pos;
 }


 int LinearScanWalker::find_optimal_split_pos(Interval* it, int min_split_pos, int max_split_pos, bool do_loop_optimization) {
-  int optimal_split_pos = -1;
-  if (min_split_pos == max_split_pos) {
-    // trivial case, no optimization of split position possible
-    TRACE_LINEAR_SCAN(4, tty->print_cr("      min-pos and max-pos are equal, no optimization possible"));
-    optimal_split_pos = min_split_pos;
-
-  } else {
-    assert(min_split_pos < max_split_pos, "must be true then");
-    assert(min_split_pos > 0, "cannot access min_split_pos - 1 otherwise");
-
-    // reason for using min_split_pos - 1: when the minimal split pos is exactly at the
-    // beginning of a block, then min_split_pos is also a possible split position.
-    // Use the block before as min_block, because then min_block->last_lir_instruction_id() + 2 == min_split_pos
-    BlockBegin* min_block = allocator()->block_of_op_with_id(min_split_pos - 1);
-
-    // reason for using max_split_pos - 1: otherwise there would be an assertion failure
-    // when an interval ends at the end of the last block of the method
-    // (in this case, max_split_pos == allocator()->max_lir_op_id() + 2, and there is no
-    // block at this op_id)
-    BlockBegin* max_block = allocator()->block_of_op_with_id(max_split_pos - 1);
-
-    assert(min_block->linear_scan_number() <= max_block->linear_scan_number(), "invalid order");
-    if (min_block == max_block) {
-      // split position cannot be moved to block boundary, so split as late as possible
-      TRACE_LINEAR_SCAN(4, tty->print_cr("      cannot move split pos to block boundary because min_pos and max_pos are in same block"));
-      optimal_split_pos = max_split_pos;
-
-    } else if (it->has_hole_between(max_split_pos - 1, max_split_pos) && !allocator()->is_block_begin(max_split_pos)) {
-      // Do not move split position if the interval has a hole before max_split_pos.
-      // Intervals resulting from Phi-Functions have more than one definition (marked
-      // as mustHaveRegister) with a hole before each definition. When the register is needed
-      // for the second definition, an earlier reloading is unnecessary.
-      TRACE_LINEAR_SCAN(4, tty->print_cr("      interval has hole just before max_split_pos, so splitting at max_split_pos"));
-      optimal_split_pos = max_split_pos;
-
-    } else {
-      // seach optimal block boundary between min_split_pos and max_split_pos
-      TRACE_LINEAR_SCAN(4, tty->print_cr("      moving split pos to optimal block boundary between block B%d and B%d", min_block->block_id(), max_block->block_id()));
-
-      if (do_loop_optimization) {
-        // Loop optimization: if a loop-end marker is found between min- and max-position,
-        // then split before this loop
-        int loop_end_pos = it->next_usage_exact(loopEndMarker, min_block->last_lir_instruction_id() + 2);
-        TRACE_LINEAR_SCAN(4, tty->print_cr("      loop optimization: loop end found at pos %d", loop_end_pos));
-
-        assert(loop_end_pos > min_split_pos, "invalid order");
-        if (loop_end_pos < max_split_pos) {
-          // loop-end marker found between min- and max-position
-          // if it is not the end marker for the same loop as the min-position, then move
-          // the max-position to this loop block.
-          // Desired result: uses tagged as shouldHaveRegister inside a loop cause a reloading
-          // of the interval (normally, only mustHaveRegister causes a reloading)
-          BlockBegin* loop_block = allocator()->block_of_op_with_id(loop_end_pos);
-
-          TRACE_LINEAR_SCAN(4, tty->print_cr("      interval is used in loop that ends in block B%d, so trying to move max_block back from B%d to B%d", loop_block->block_id(), max_block->block_id(), loop_block->block_id()));
-          assert(loop_block != min_block, "loop_block and min_block must be different because block boundary is needed between");
-
-          optimal_split_pos = find_optimal_split_pos(min_block, loop_block, loop_block->last_lir_instruction_id() + 2);
-          if (optimal_split_pos == loop_block->last_lir_instruction_id() + 2) {
-            optimal_split_pos = -1;
-            TRACE_LINEAR_SCAN(4, tty->print_cr("      loop optimization not necessary"));
-          } else {
-            TRACE_LINEAR_SCAN(4, tty->print_cr("      loop optimization successful"));
-          }
-        }
-      }
-
-      if (optimal_split_pos == -1) {
-        // not calculated by loop optimization
-        optimal_split_pos = find_optimal_split_pos(min_block, max_block, max_split_pos);
-      }
-    }
-  }
-  TRACE_LINEAR_SCAN(4, tty->print_cr("      optimal split position: %d", optimal_split_pos));
-
-  return optimal_split_pos;
+	int optimal_split_pos = -1;
+	if (min_split_pos == max_split_pos) {
+		// trivial case, no optimization of split position possible
+		TRACE_LINEAR_SCAN(4, tty->print_cr("      min-pos and max-pos are equal, no optimization possible"));
+		optimal_split_pos = min_split_pos;
+
+	} else {
+		assert(min_split_pos < max_split_pos, "must be true then");
+		assert(min_split_pos > 0, "cannot access min_split_pos - 1 otherwise");
+
+		// reason for using min_split_pos - 1: when the minimal split pos is exactly at the
+		// beginning of a block, then min_split_pos is also a possible split position.
+		// Use the block before as min_block, because then min_block->last_lir_instruction_id() + 2 == min_split_pos
+		BlockBegin* min_block = allocator()->block_of_op_with_id(min_split_pos - 1);
+
+		// reason for using max_split_pos - 1: otherwise there would be an assertion failure
+		// when an interval ends at the end of the last block of the method
+		// (in this case, max_split_pos == allocator()->max_lir_op_id() + 2, and there is no
+		// block at this op_id)
+		BlockBegin* max_block = allocator()->block_of_op_with_id(max_split_pos - 1);
+
+		assert(min_block->linear_scan_number() <= max_block->linear_scan_number(), "invalid order");
+		if (min_block == max_block) {
+			// split position cannot be moved to block boundary, so split as late as possible
+			TRACE_LINEAR_SCAN(4, tty->print_cr("      cannot move split pos to block boundary because min_pos and max_pos are in same block"));
+			optimal_split_pos = max_split_pos;
+
+		} else if (it->has_hole_between(max_split_pos - 1, max_split_pos) && !allocator()->is_block_begin(max_split_pos)) {
+			// Do not move split position if the interval has a hole before max_split_pos.
+			// Intervals resulting from Phi-Functions have more than one definition (marked
+			// as mustHaveRegister) with a hole before each definition. When the register is needed
+			// for the second definition, an earlier reloading is unnecessary.
+			TRACE_LINEAR_SCAN(4, tty->print_cr("      interval has hole just before max_split_pos, so splitting at max_split_pos"));
+			optimal_split_pos = max_split_pos;
+
+		} else {
+			// seach optimal block boundary between min_split_pos and max_split_pos
+			TRACE_LINEAR_SCAN(4, tty->print_cr("      moving split pos to optimal block boundary between block B%d and B%d", min_block->block_id(), max_block->block_id()));
+
+			if (do_loop_optimization) {
+				// Loop optimization: if a loop-end marker is found between min- and max-position,
+				// then split before this loop
+				int loop_end_pos = it->next_usage_exact(loopEndMarker, min_block->last_lir_instruction_id() + 2);
+				TRACE_LINEAR_SCAN(4, tty->print_cr("      loop optimization: loop end found at pos %d", loop_end_pos));
+
+				assert(loop_end_pos > min_split_pos, "invalid order");
+				if (loop_end_pos < max_split_pos) {
+					// loop-end marker found between min- and max-position
+					// if it is not the end marker for the same loop as the min-position, then move
+					// the max-position to this loop block.
+					// Desired result: uses tagged as shouldHaveRegister inside a loop cause a reloading
+					// of the interval (normally, only mustHaveRegister causes a reloading)
+					BlockBegin* loop_block = allocator()->block_of_op_with_id(loop_end_pos);
+
+					TRACE_LINEAR_SCAN(4, tty->print_cr("      interval is used in loop that ends in block B%d, so trying to move max_block back from B%d to B%d", loop_block->block_id(), max_block->block_id(), loop_block->block_id()));
+					assert(loop_block != min_block, "loop_block and min_block must be different because block boundary is needed between");
+
+					optimal_split_pos = find_optimal_split_pos(min_block, loop_block, loop_block->last_lir_instruction_id() + 2);
+					if (optimal_split_pos == loop_block->last_lir_instruction_id() + 2) {
+						optimal_split_pos = -1;
+						TRACE_LINEAR_SCAN(4, tty->print_cr("      loop optimization not necessary"));
+					} else {
+						TRACE_LINEAR_SCAN(4, tty->print_cr("      loop optimization successful"));
+					}
+				}
+			}
+
+			if (optimal_split_pos == -1) {
+				// not calculated by loop optimization
+				optimal_split_pos = find_optimal_split_pos(min_block, max_block, max_split_pos);
+			}
+		}
+	}
+	TRACE_LINEAR_SCAN(4, tty->print_cr("      optimal split position: %d", optimal_split_pos));
+
+	return optimal_split_pos;
 }


 /*
-  split an interval at the optimal position between min_split_pos and
-  max_split_pos in two parts:
-  1) the left part has already a location assigned
-  2) the right part is sorted into to the unhandled-list
-*/
+   split an interval at the optimal position between min_split_pos and
+   max_split_pos in two parts:
+   1) the left part has already a location assigned
+   2) the right part is sorted into to the unhandled-list
+ */
 void LinearScanWalker::split_before_usage(Interval* it, int min_split_pos, int max_split_pos) {
-  TRACE_LINEAR_SCAN(2, tty->print   ("----- splitting interval: "); it->print());
-  TRACE_LINEAR_SCAN(2, tty->print_cr("      between %d and %d", min_split_pos, max_split_pos));
-
-  assert(it->from() < min_split_pos,         "cannot split at start of interval");
-  assert(current_position() < min_split_pos, "cannot split before current position");
-  assert(min_split_pos <= max_split_pos,     "invalid order");
-  assert(max_split_pos <= it->to(),          "cannot split after end of interval");
-
-  int optimal_split_pos = find_optimal_split_pos(it, min_split_pos, max_split_pos, true);
-
-  assert(min_split_pos <= optimal_split_pos && optimal_split_pos <= max_split_pos, "out of range");
-  assert(optimal_split_pos <= it->to(),  "cannot split after end of interval");
-  assert(optimal_split_pos > it->from(), "cannot split at start of interval");
-
-  if (optimal_split_pos == it->to() && it->next_usage(mustHaveRegister, min_split_pos) == max_jint) {
-    // the split position would be just before the end of the interval
-    // -> no split at all necessary
-    TRACE_LINEAR_SCAN(4, tty->print_cr("      no split necessary because optimal split position is at end of interval"));
-    return;
-  }
-
-  // must calculate this before the actual split is performed and before split position is moved to odd op_id
-  bool move_necessary = !allocator()->is_block_begin(optimal_split_pos) && !it->has_hole_between(optimal_split_pos - 1, optimal_split_pos);
-
-  if (!allocator()->is_block_begin(optimal_split_pos)) {
-    // move position before actual instruction (odd op_id)
-    optimal_split_pos = (optimal_split_pos - 1) | 1;
-  }
-
-  TRACE_LINEAR_SCAN(4, tty->print_cr("      splitting at position %d", optimal_split_pos));
-  assert(allocator()->is_block_begin(optimal_split_pos) || (optimal_split_pos % 2 == 1), "split pos must be odd when not on block boundary");
-  assert(!allocator()->is_block_begin(optimal_split_pos) || (optimal_split_pos % 2 == 0), "split pos must be even on block boundary");
-
-  Interval* split_part = it->split(optimal_split_pos);
-
-  allocator()->append_interval(split_part);
-  allocator()->copy_register_flags(it, split_part);
-  split_part->set_insert_move_when_activated(move_necessary);
-  append_to_unhandled(unhandled_first_addr(anyKind), split_part);
-
-  TRACE_LINEAR_SCAN(2, tty->print_cr("      split interval in two parts (insert_move_when_activated: %d)", move_necessary));
-  TRACE_LINEAR_SCAN(2, tty->print   ("      "); it->print());
-  TRACE_LINEAR_SCAN(2, tty->print   ("      "); split_part->print());
+	TRACE_LINEAR_SCAN(2, tty->print   ("----- splitting interval: "); it->print());
+	TRACE_LINEAR_SCAN(2, tty->print_cr("      between %d and %d", min_split_pos, max_split_pos));
+
+	assert(it->from() < min_split_pos,         "cannot split at start of interval");
+	assert(current_position() < min_split_pos, "cannot split before current position");
+	assert(min_split_pos <= max_split_pos,     "invalid order");
+	assert(max_split_pos <= it->to(),          "cannot split after end of interval");
+
+	int optimal_split_pos = find_optimal_split_pos(it, min_split_pos, max_split_pos, true);
+
+	assert(min_split_pos <= optimal_split_pos && optimal_split_pos <= max_split_pos, "out of range");
+	assert(optimal_split_pos <= it->to(),  "cannot split after end of interval");
+	assert(optimal_split_pos > it->from(), "cannot split at start of interval");
+
+	if (optimal_split_pos == it->to() && it->next_usage(mustHaveRegister, min_split_pos) == max_jint) {
+		// the split position would be just before the end of the interval
+		// -> no split at all necessary
+		TRACE_LINEAR_SCAN(4, tty->print_cr("      no split necessary because optimal split position is at end of interval"));
+		return;
+	}
+
+	// must calculate this before the actual split is performed and before split position is moved to odd op_id
+	bool move_necessary = !allocator()->is_block_begin(optimal_split_pos) && !it->has_hole_between(optimal_split_pos - 1, optimal_split_pos);
+
+	if (!allocator()->is_block_begin(optimal_split_pos)) {
+		// move position before actual instruction (odd op_id)
+		optimal_split_pos = (optimal_split_pos - 1) | 1;
+	}
+
+	TRACE_LINEAR_SCAN(4, tty->print_cr("      splitting at position %d", optimal_split_pos));
+	assert(allocator()->is_block_begin(optimal_split_pos) || (optimal_split_pos % 2 == 1), "split pos must be odd when not on block boundary");
+	assert(!allocator()->is_block_begin(optimal_split_pos) || (optimal_split_pos % 2 == 0), "split pos must be even on block boundary");
+
+	Interval* split_part = it->split(optimal_split_pos);
+
+	allocator()->append_interval(split_part);
+	allocator()->copy_register_flags(it, split_part);
+	split_part->set_insert_move_when_activated(move_necessary);
+	append_to_unhandled(unhandled_first_addr(anyKind), split_part);
+
+	TRACE_LINEAR_SCAN(2, tty->print_cr("      split interval in two parts (insert_move_when_activated: %d)", move_necessary));
+	TRACE_LINEAR_SCAN(2, tty->print   ("      "); it->print());
+	TRACE_LINEAR_SCAN(2, tty->print   ("      "); split_part->print());
 }

 /*
-  split an interval at the optimal position between min_split_pos and
-  max_split_pos in two parts:
-  1) the left part has already a location assigned
-  2) the right part is always on the stack and therefore ignored in further processing
-*/
+   split an interval at the optimal position between min_split_pos and
+   max_split_pos in two parts:
+   1) the left part has already a location assigned
+   2) the right part is always on the stack and therefore ignored in further processing
+ */
 void LinearScanWalker::split_for_spilling(Interval* it) {
-  // calculate allowed range of splitting position
-  int max_split_pos = current_position();
-  int min_split_pos = MAX2(it->previous_usage(shouldHaveRegister, max_split_pos) + 1, it->from());
-
-  TRACE_LINEAR_SCAN(2, tty->print   ("----- splitting and spilling interval: "); it->print());
-  TRACE_LINEAR_SCAN(2, tty->print_cr("      between %d and %d", min_split_pos, max_split_pos));
-
-  assert(it->state() == activeState,     "why spill interval that is not active?");
-  assert(it->from() <= min_split_pos,    "cannot split before start of interval");
-  assert(min_split_pos <= max_split_pos, "invalid order");
-  assert(max_split_pos < it->to(),       "cannot split at end end of interval");
-  assert(current_position() < it->to(),  "interval must not end before current position");
-
-  if (min_split_pos == it->from()) {
-    // the whole interval is never used, so spill it entirely to memory
-    TRACE_LINEAR_SCAN(2, tty->print_cr("      spilling entire interval because split pos is at beginning of interval"));
-    assert(it->first_usage(shouldHaveRegister) > current_position(), "interval must not have use position before current_position");
-
-    allocator()->assign_spill_slot(it);
-    allocator()->change_spill_state(it, min_split_pos);
-
-    // Also kick parent intervals out of register to memory when they have no use
-    // position. This avoids short interval in register surrounded by intervals in
-    // memory -> avoid useless moves from memory to register and back
-    Interval* parent = it;
-    while (parent != NULL && parent->is_split_child()) {
-      parent = parent->split_child_before_op_id(parent->from());
-
-      if (parent->assigned_reg() < LinearScan::nof_regs) {
-        if (parent->first_usage(shouldHaveRegister) == max_jint) {
-          // parent is never used, so kick it out of its assigned register
-          TRACE_LINEAR_SCAN(4, tty->print_cr("      kicking out interval %d out of its register because it is never used", parent->reg_num()));
-          allocator()->assign_spill_slot(parent);
-        } else {
-          // do not go further back because the register is actually used by the interval
-          parent = NULL;
-        }
-      }
-    }
-
-  } else {
-    // search optimal split pos, split interval and spill only the right hand part
-    int optimal_split_pos = find_optimal_split_pos(it, min_split_pos, max_split_pos, false);
-
-    assert(min_split_pos <= optimal_split_pos && optimal_split_pos <= max_split_pos, "out of range");
-    assert(optimal_split_pos < it->to(), "cannot split at end of interval");
-    assert(optimal_split_pos >= it->from(), "cannot split before start of interval");
-
-    if (!allocator()->is_block_begin(optimal_split_pos)) {
-      // move position before actual instruction (odd op_id)
-      optimal_split_pos = (optimal_split_pos - 1) | 1;
-    }
-
-    TRACE_LINEAR_SCAN(4, tty->print_cr("      splitting at position %d", optimal_split_pos));
-    assert(allocator()->is_block_begin(optimal_split_pos)  || (optimal_split_pos % 2 == 1), "split pos must be odd when not on block boundary");
-    assert(!allocator()->is_block_begin(optimal_split_pos) || (optimal_split_pos % 2 == 0), "split pos must be even on block boundary");
-
-    Interval* spilled_part = it->split(optimal_split_pos);
-    allocator()->append_interval(spilled_part);
-    allocator()->assign_spill_slot(spilled_part);
-    allocator()->change_spill_state(spilled_part, optimal_split_pos);
-
-    if (!allocator()->is_block_begin(optimal_split_pos)) {
-      TRACE_LINEAR_SCAN(4, tty->print_cr("      inserting move from interval %d to %d", it->reg_num(), spilled_part->reg_num()));
-      insert_move(optimal_split_pos, it, spilled_part);
-    }
-
-    // the current_split_child is needed later when moves are inserted for reloading
-    assert(spilled_part->current_split_child() == it, "overwriting wrong current_split_child");
-    spilled_part->make_current_split_child();
-
-    TRACE_LINEAR_SCAN(2, tty->print_cr("      split interval in two parts"));
-    TRACE_LINEAR_SCAN(2, tty->print   ("      "); it->print());
-    TRACE_LINEAR_SCAN(2, tty->print   ("      "); spilled_part->print());
-  }
+	// calculate allowed range of splitting position
+	int max_split_pos = current_position();
+	int min_split_pos = MAX2(it->previous_usage(shouldHaveRegister, max_split_pos) + 1, it->from());
+
+	TRACE_LINEAR_SCAN(2, tty->print   ("----- splitting and spilling interval: "); it->print());
+	TRACE_LINEAR_SCAN(2, tty->print_cr("      between %d and %d", min_split_pos, max_split_pos));
+
+	assert(it->state() == activeState,     "why spill interval that is not active?");
+	assert(it->from() <= min_split_pos,    "cannot split before start of interval");
+	assert(min_split_pos <= max_split_pos, "invalid order");
+	assert(max_split_pos < it->to(),       "cannot split at end end of interval");
+	assert(current_position() < it->to(),  "interval must not end before current position");
+
+	if (min_split_pos == it->from()) {
+		// the whole interval is never used, so spill it entirely to memory
+		TRACE_LINEAR_SCAN(2, tty->print_cr("      spilling entire interval because split pos is at beginning of interval"));
+		assert(it->first_usage(shouldHaveRegister) > current_position(), "interval must not have use position before current_position");
+
+		allocator()->assign_spill_slot(it);
+		allocator()->change_spill_state(it, min_split_pos);
+
+		// Also kick parent intervals out of register to memory when they have no use
+		// position. This avoids short interval in register surrounded by intervals in
+		// memory -> avoid useless moves from memory to register and back
+		Interval* parent = it;
+		while (parent != NULL && parent->is_split_child()) {
+			parent = parent->split_child_before_op_id(parent->from());
+
+			if (parent->assigned_reg() < LinearScan::nof_regs) {
+				if (parent->first_usage(shouldHaveRegister) == max_jint) {
+					// parent is never used, so kick it out of its assigned register
+					TRACE_LINEAR_SCAN(4, tty->print_cr("      kicking out interval %d out of its register because it is never used", parent->reg_num()));
+					allocator()->assign_spill_slot(parent);
+				} else {
+					// do not go further back because the register is actually used by the interval
+					parent = NULL;
+				}
+			}
+		}
+
+	} else {
+		// search optimal split pos, split interval and spill only the right hand part
+		int optimal_split_pos = find_optimal_split_pos(it, min_split_pos, max_split_pos, false);
+
+		assert(min_split_pos <= optimal_split_pos && optimal_split_pos <= max_split_pos, "out of range");
+		assert(optimal_split_pos < it->to(), "cannot split at end of interval");
+		assert(optimal_split_pos >= it->from(), "cannot split before start of interval");
+
+		if (!allocator()->is_block_begin(optimal_split_pos)) {
+			// move position before actual instruction (odd op_id)
+			optimal_split_pos = (optimal_split_pos - 1) | 1;
+		}
+
+		TRACE_LINEAR_SCAN(4, tty->print_cr("      splitting at position %d", optimal_split_pos));
+		assert(allocator()->is_block_begin(optimal_split_pos)  || (optimal_split_pos % 2 == 1), "split pos must be odd when not on block boundary");
+		assert(!allocator()->is_block_begin(optimal_split_pos) || (optimal_split_pos % 2 == 0), "split pos must be even on block boundary");
+
+		Interval* spilled_part = it->split(optimal_split_pos);
+		allocator()->append_interval(spilled_part);
+		allocator()->assign_spill_slot(spilled_part);
+		allocator()->change_spill_state(spilled_part, optimal_split_pos);
+
+		if (!allocator()->is_block_begin(optimal_split_pos)) {
+			TRACE_LINEAR_SCAN(4, tty->print_cr("      inserting move from interval %d to %d", it->reg_num(), spilled_part->reg_num()));
+			insert_move(optimal_split_pos, it, spilled_part);
+		}
+
+		// the current_split_child is needed later when moves are inserted for reloading
+		assert(spilled_part->current_split_child() == it, "overwriting wrong current_split_child");
+		spilled_part->make_current_split_child();
+
+		TRACE_LINEAR_SCAN(2, tty->print_cr("      split interval in two parts"));
+		TRACE_LINEAR_SCAN(2, tty->print   ("      "); it->print());
+		TRACE_LINEAR_SCAN(2, tty->print   ("      "); spilled_part->print());
+	}
 }


 void LinearScanWalker::split_stack_interval(Interval* it) {
-  int min_split_pos = current_position() + 1;
-  int max_split_pos = MIN2(it->first_usage(shouldHaveRegister), it->to());
-
-  split_before_usage(it, min_split_pos, max_split_pos);
+	int min_split_pos = current_position() + 1;
+	int max_split_pos = MIN2(it->first_usage(shouldHaveRegister), it->to());
+
+	split_before_usage(it, min_split_pos, max_split_pos);
 }

 void LinearScanWalker::split_when_partial_register_available(Interval* it, int register_available_until) {
-  int min_split_pos = MAX2(it->previous_usage(shouldHaveRegister, register_available_until), it->from() + 1);
-  int max_split_pos = register_available_until;
-
-  split_before_usage(it, min_split_pos, max_split_pos);
+	int min_split_pos = MAX2(it->previous_usage(shouldHaveRegister, register_available_until), it->from() + 1);
+	int max_split_pos = register_available_until;
+
+	split_before_usage(it, min_split_pos, max_split_pos);
 }

 void LinearScanWalker::split_and_spill_interval(Interval* it) {
-  assert(it->state() == activeState || it->state() == inactiveState, "other states not allowed");
-
-  int current_pos = current_position();
-  if (it->state() == inactiveState) {
-    // the interval is currently inactive, so no spill slot is needed for now.
-    // when the split part is activated, the interval has a new chance to get a register,
-    // so in the best case no stack slot is necessary
-    assert(it->has_hole_between(current_pos - 1, current_pos + 1), "interval can not be inactive otherwise");
-    split_before_usage(it, current_pos + 1, current_pos + 1);
-
-  } else {
-    // search the position where the interval must have a register and split
-    // at the optimal position before.
-    // The new created part is added to the unhandled list and will get a register
-    // when it is activated
-    int min_split_pos = current_pos + 1;
-    int max_split_pos = MIN2(it->next_usage(mustHaveRegister, min_split_pos), it->to());
-
-    split_before_usage(it, min_split_pos, max_split_pos);
-
-    assert(it->next_usage(mustHaveRegister, current_pos) == max_jint, "the remaining part is spilled to stack and therefore has no register");
-    split_for_spilling(it);
-  }
+	assert(it->state() == activeState || it->state() == inactiveState, "other states not allowed");
+
+	int current_pos = current_position();
+	if (it->state() == inactiveState) {
+		// the interval is currently inactive, so no spill slot is needed for now.
+		// when the split part is activated, the interval has a new chance to get a register,
+		// so in the best case no stack slot is necessary
+		assert(it->has_hole_between(current_pos - 1, current_pos + 1), "interval can not be inactive otherwise");
+		split_before_usage(it, current_pos + 1, current_pos + 1);
+
+	} else {
+		// search the position where the interval must have a register and split
+		// at the optimal position before.
+		// The new created part is added to the unhandled list and will get a register
+		// when it is activated
+		int min_split_pos = current_pos + 1;
+		int max_split_pos = MIN2(it->next_usage(mustHaveRegister, min_split_pos), it->to());
+
+		split_before_usage(it, min_split_pos, max_split_pos);
+
+		assert(it->next_usage(mustHaveRegister, current_pos) == max_jint, "the remaining part is spilled to stack and therefore has no register");
+		split_for_spilling(it);
+	}
 }


 int LinearScanWalker::find_free_reg(int reg_needed_until, int interval_to, int hint_reg, int ignore_reg, bool* need_split) {
-  int min_full_reg = any_reg;
-  int max_partial_reg = any_reg;
-
-  for (int i = _first_reg; i <= _last_reg; i++) {
-    if (i == ignore_reg) {
-      // this register must be ignored
-
-    } else if (_use_pos[i] >= interval_to) {
-      // this register is free for the full interval
-      if (min_full_reg == any_reg || i == hint_reg || (_use_pos[i] < _use_pos[min_full_reg] && min_full_reg != hint_reg)) {
-        min_full_reg = i;
-      }
-    } else if (_use_pos[i] > reg_needed_until) {
-      // this register is at least free until reg_needed_until
-      if (max_partial_reg == any_reg || i == hint_reg || (_use_pos[i] > _use_pos[max_partial_reg] && max_partial_reg != hint_reg)) {
-        max_partial_reg = i;
-      }
-    }
-  }
-
-  if (min_full_reg != any_reg) {
-    return min_full_reg;
-  } else if (max_partial_reg != any_reg) {
-    *need_split = true;
-    return max_partial_reg;
-  } else {
-    return any_reg;
-  }
+	int min_full_reg = any_reg;
+	int max_partial_reg = any_reg;
+
+	for (int i = _first_reg; i <= _last_reg; i++) {
+		if (i == ignore_reg) {
+			// this register must be ignored
+
+		} else if (_use_pos[i] >= interval_to) {
+			// this register is free for the full interval
+			if (min_full_reg == any_reg || i == hint_reg || (_use_pos[i] < _use_pos[min_full_reg] && min_full_reg != hint_reg)) {
+				min_full_reg = i;
+			}
+		} else if (_use_pos[i] > reg_needed_until) {
+			// this register is at least free until reg_needed_until
+			if (max_partial_reg == any_reg || i == hint_reg || (_use_pos[i] > _use_pos[max_partial_reg] && max_partial_reg != hint_reg)) {
+				max_partial_reg = i;
+			}
+		}
+	}
+
+	if (min_full_reg != any_reg) {
+		return min_full_reg;
+	} else if (max_partial_reg != any_reg) {
+		*need_split = true;
+		return max_partial_reg;
+	} else {
+		return any_reg;
+	}
 }

 int LinearScanWalker::find_free_double_reg(int reg_needed_until, int interval_to, int hint_reg, bool* need_split) {
-  assert((_last_reg - _first_reg + 1) % 2 == 0, "adjust algorithm");
-
-  int min_full_reg = any_reg;
-  int max_partial_reg = any_reg;
-
-  for (int i = _first_reg; i < _last_reg; i+=2) {
-    if (_use_pos[i] >= interval_to && _use_pos[i + 1] >= interval_to) {
-      // this register is free for the full interval
-      if (min_full_reg == any_reg || i == hint_reg || (_use_pos[i] < _use_pos[min_full_reg] && min_full_reg != hint_reg)) {
-        min_full_reg = i;
-      }
-    } else if (_use_pos[i] > reg_needed_until && _use_pos[i + 1] > reg_needed_until) {
-      // this register is at least free until reg_needed_until
-      if (max_partial_reg == any_reg || i == hint_reg || (_use_pos[i] > _use_pos[max_partial_reg] && max_partial_reg != hint_reg)) {
-        max_partial_reg = i;
-      }
-    }
-  }
-
-  if (min_full_reg != any_reg) {
-    return min_full_reg;
-  } else if (max_partial_reg != any_reg) {
-    *need_split = true;
-    return max_partial_reg;
-  } else {
-    return any_reg;
-  }
+	assert((_last_reg - _first_reg + 1) % 2 == 0, "adjust algorithm");
+
+	int min_full_reg = any_reg;
+	int max_partial_reg = any_reg;
+
+	for (int i = _first_reg; i < _last_reg; i+=2) {
+		if (_use_pos[i] >= interval_to && _use_pos[i + 1] >= interval_to) {
+			// this register is free for the full interval
+			if (min_full_reg == any_reg || i == hint_reg || (_use_pos[i] < _use_pos[min_full_reg] && min_full_reg != hint_reg)) {
+				min_full_reg = i;
+			}
+		} else if (_use_pos[i] > reg_needed_until && _use_pos[i + 1] > reg_needed_until) {
+			// this register is at least free until reg_needed_until
+			if (max_partial_reg == any_reg || i == hint_reg || (_use_pos[i] > _use_pos[max_partial_reg] && max_partial_reg != hint_reg)) {
+				max_partial_reg = i;
+			}
+		}
+	}
+
+	if (min_full_reg != any_reg) {
+		return min_full_reg;
+	} else if (max_partial_reg != any_reg) {
+		*need_split = true;
+		return max_partial_reg;
+	} else {
+		return any_reg;
+	}
 }


 bool LinearScanWalker::alloc_free_reg(Interval* cur) {
-  TRACE_LINEAR_SCAN(2, tty->print("trying to find free register for "); cur->print());
-
-  init_use_lists(true);
-  free_exclude_active_fixed();
-  free_exclude_active_any();
-  free_collect_inactive_fixed(cur);
-  free_collect_inactive_any(cur);
-//  free_collect_unhandled(fixedKind, cur);
-  assert(unhandled_first(fixedKind) == Interval::end(), "must not have unhandled fixed intervals because all fixed intervals have a use at position 0");
-
-  // _use_pos contains the start of the next interval that has this register assigned
-  // (either as a fixed register or a normal allocated register in the past)
-  // only intervals overlapping with cur are processed, non-overlapping invervals can be ignored safely
-  TRACE_LINEAR_SCAN(4, tty->print_cr("      state of registers:"));
-  TRACE_LINEAR_SCAN(4, for (int i = _first_reg; i <= _last_reg; i++) tty->print_cr("      reg %d: use_pos: %d", i, _use_pos[i]));
-
-  int hint_reg, hint_regHi;
-  Interval* register_hint = cur->register_hint();
-  if (register_hint != NULL) {
-    hint_reg = register_hint->assigned_reg();
-    hint_regHi = register_hint->assigned_regHi();
-
-    if (allocator()->is_precolored_cpu_interval(register_hint)) {
-      assert(hint_reg != any_reg && hint_regHi == any_reg, "must be for fixed intervals");
-      hint_regHi = hint_reg + 1;  // connect e.g. eax-edx
-    }
-    TRACE_LINEAR_SCAN(4, tty->print("      hint registers %d, %d from interval ", hint_reg, hint_regHi); register_hint->print());
-
-  } else {
-    hint_reg = any_reg;
-    hint_regHi = any_reg;
-  }
-  assert(hint_reg == any_reg || hint_reg != hint_regHi, "hint reg and regHi equal");
-  assert(cur->assigned_reg() == any_reg && cur->assigned_regHi() == any_reg, "register already assigned to interval");
-
-  // the register must be free at least until this position
-  int reg_needed_until = cur->from() + 1;
-  int interval_to = cur->to();
-
-  bool need_split = false;
-  int split_pos = -1;
-  int reg = any_reg;
-  int regHi = any_reg;
-
-  if (_adjacent_regs) {
-    reg = find_free_double_reg(reg_needed_until, interval_to, hint_reg, &need_split);
-    regHi = reg + 1;
-    if (reg == any_reg) {
-      return false;
-    }
-    split_pos = MIN2(_use_pos[reg], _use_pos[regHi]);
-
-  } else {
-    reg = find_free_reg(reg_needed_until, interval_to, hint_reg, any_reg, &need_split);
-    if (reg == any_reg) {
-      return false;
-    }
-    split_pos = _use_pos[reg];
-
-    if (_num_phys_regs == 2) {
-      regHi = find_free_reg(reg_needed_until, interval_to, hint_regHi, reg, &need_split);
-
-      if (_use_pos[reg] < interval_to && regHi == any_reg) {
-        // do not split interval if only one register can be assigned until the split pos
-        // (when one register is found for the whole interval, split&spill is only
-        // performed for the hi register)
-        return false;
-
-      } else if (regHi != any_reg) {
-        split_pos = MIN2(split_pos, _use_pos[regHi]);
-
-        // sort register numbers to prevent e.g. a move from eax,ebx to ebx,eax
-        if (reg > regHi) {
-          int temp = reg;
-          reg = regHi;
-          regHi = temp;
-        }
-      }
-    }
-  }
-
-  cur->assign_reg(reg, regHi);
-  TRACE_LINEAR_SCAN(2, tty->print_cr("selected register %d, %d", reg, regHi));
-
-  assert(split_pos > 0, "invalid split_pos");
-  if (need_split) {
-    // register not available for full interval, so split it
-    split_when_partial_register_available(cur, split_pos);
-  }
-
-  // only return true if interval is completely assigned
-  return _num_phys_regs == 1 || regHi != any_reg;
+	TRACE_LINEAR_SCAN(2, tty->print("trying to find free register for "); cur->print());
+
+	init_use_lists(true);
+	free_exclude_active_fixed();
+	free_exclude_active_any();
+	free_collect_inactive_fixed(cur);
+	free_collect_inactive_any(cur);
+	//  free_collect_unhandled(fixedKind, cur);
+	assert(unhandled_first(fixedKind) == Interval::end(), "must not have unhandled fixed intervals because all fixed intervals have a use at position 0");
+
+	// _use_pos contains the start of the next interval that has this register assigned
+	// (either as a fixed register or a normal allocated register in the past)
+	// only intervals overlapping with cur are processed, non-overlapping invervals can be ignored safely
+	TRACE_LINEAR_SCAN(4, tty->print_cr("      state of registers:"));
+	TRACE_LINEAR_SCAN(4, for (int i = _first_reg; i <= _last_reg; i++) tty->print_cr("      reg %d: use_pos: %d", i, _use_pos[i]));
+
+	int hint_reg, hint_regHi;
+	Interval* register_hint = cur->register_hint();
+	if (register_hint != NULL) {
+		hint_reg = register_hint->assigned_reg();
+		hint_regHi = register_hint->assigned_regHi();
+
+		if (allocator()->is_precolored_cpu_interval(register_hint)) {
+			assert(hint_reg != any_reg && hint_regHi == any_reg, "must be for fixed intervals");
+			hint_regHi = hint_reg + 1;  // connect e.g. eax-edx
+		}
+		TRACE_LINEAR_SCAN(4, tty->print("      hint registers %d, %d from interval ", hint_reg, hint_regHi); register_hint->print());
+
+	} else {
+		hint_reg = any_reg;
+		hint_regHi = any_reg;
+	}
+	assert(hint_reg == any_reg || hint_reg != hint_regHi, "hint reg and regHi equal");
+	assert(cur->assigned_reg() == any_reg && cur->assigned_regHi() == any_reg, "register already assigned to interval");
+
+	// the register must be free at least until this position
+	int reg_needed_until = cur->from() + 1;
+	int interval_to = cur->to();
+
+	bool need_split = false;
+	int split_pos = -1;
+	int reg = any_reg;
+	int regHi = any_reg;
+
+	if (_adjacent_regs) {
+		reg = find_free_double_reg(reg_needed_until, interval_to, hint_reg, &need_split);
+		regHi = reg + 1;
+		if (reg == any_reg) {
+			return false;
+		}
+		split_pos = MIN2(_use_pos[reg], _use_pos[regHi]);
+
+	} else {
+		reg = find_free_reg(reg_needed_until, interval_to, hint_reg, any_reg, &need_split);
+		if (reg == any_reg) {
+			return false;
+		}
+		split_pos = _use_pos[reg];
+
+		if (_num_phys_regs == 2) {
+			regHi = find_free_reg(reg_needed_until, interval_to, hint_regHi, reg, &need_split);
+
+			if (_use_pos[reg] < interval_to && regHi == any_reg) {
+				// do not split interval if only one register can be assigned until the split pos
+				// (when one register is found for the whole interval, split&spill is only
+				// performed for the hi register)
+				return false;
+
+			} else if (regHi != any_reg) {
+				split_pos = MIN2(split_pos, _use_pos[regHi]);
+
+				// sort register numbers to prevent e.g. a move from eax,ebx to ebx,eax
+				if (reg > regHi) {
+					int temp = reg;
+					reg = regHi;
+					regHi = temp;
+				}
+			}
+		}
+	}
+
+	cur->assign_reg(reg, regHi);
+	TRACE_LINEAR_SCAN(2, tty->print_cr("selected register %d, %d", reg, regHi));
+
+	assert(split_pos > 0, "invalid split_pos");
+	if (need_split) {
+		// register not available for full interval, so split it
+		split_when_partial_register_available(cur, split_pos);
+	}
+
+	// only return true if interval is completely assigned
+	return _num_phys_regs == 1 || regHi != any_reg;
 }


 int LinearScanWalker::find_locked_reg(int reg_needed_until, int interval_to, int hint_reg, int ignore_reg, bool* need_split) {
-  int max_reg = any_reg;
-
-  for (int i = _first_reg; i <= _last_reg; i++) {
-    if (i == ignore_reg) {
-      // this register must be ignored
-
-    } else if (_use_pos[i] > reg_needed_until) {
-      if (max_reg == any_reg || i == hint_reg || (_use_pos[i] > _use_pos[max_reg] && max_reg != hint_reg)) {
-        max_reg = i;
-      }
-    }
-  }
-
-  if (max_reg != any_reg && _block_pos[max_reg] <= interval_to) {
-    *need_split = true;
-  }
-
-  return max_reg;
+	int max_reg = any_reg;
+
+	for (int i = _first_reg; i <= _last_reg; i++) {
+		if (i == ignore_reg) {
+			// this register must be ignored
+
+		} else if (_use_pos[i] > reg_needed_until) {
+			if (max_reg == any_reg || i == hint_reg || (_use_pos[i] > _use_pos[max_reg] && max_reg != hint_reg)) {
+				max_reg = i;
+			}
+		}
+	}
+
+	if (max_reg != any_reg && _block_pos[max_reg] <= interval_to) {
+		*need_split = true;
+	}
+
+	return max_reg;
 }

 int LinearScanWalker::find_locked_double_reg(int reg_needed_until, int interval_to, int hint_reg, bool* need_split) {
-  assert((_last_reg - _first_reg + 1) % 2 == 0, "adjust algorithm");
-
-  int max_reg = any_reg;
-
-  for (int i = _first_reg; i < _last_reg; i+=2) {
-    if (_use_pos[i] > reg_needed_until && _use_pos[i + 1] > reg_needed_until) {
-      if (max_reg == any_reg || _use_pos[i] > _use_pos[max_reg]) {
-        max_reg = i;
-      }
-    }
-  }
-
-  if (_block_pos[max_reg] <= interval_to || _block_pos[max_reg + 1] <= interval_to) {
-    *need_split = true;
-  }
-
-  return max_reg;
+	assert((_last_reg - _first_reg + 1) % 2 == 0, "adjust algorithm");
+
+	int max_reg = any_reg;
+
+	for (int i = _first_reg; i < _last_reg; i+=2) {
+		if (_use_pos[i] > reg_needed_until && _use_pos[i + 1] > reg_needed_until) {
+			if (max_reg == any_reg || _use_pos[i] > _use_pos[max_reg]) {
+				max_reg = i;
+			}
+		}
+	}
+
+	if (_block_pos[max_reg] <= interval_to || _block_pos[max_reg + 1] <= interval_to) {
+		*need_split = true;
+	}
+
+	return max_reg;
 }

 void LinearScanWalker::split_and_spill_intersecting_intervals(int reg, int regHi) {
-  assert(reg != any_reg, "no register assigned");
-
-  for (int i = 0; i < _spill_intervals[reg]->length(); i++) {
-    Interval* it = _spill_intervals[reg]->at(i);
-    remove_from_list(it);
-    split_and_spill_interval(it);
-  }
-
-  if (regHi != any_reg) {
-    IntervalList* processed = _spill_intervals[reg];
-    for (int i = 0; i < _spill_intervals[regHi]->length(); i++) {
-      Interval* it = _spill_intervals[regHi]->at(i);
-      if (processed->index_of(it) == -1) {
-        remove_from_list(it);
-        split_and_spill_interval(it);
-      }
-    }
-  }
+	assert(reg != any_reg, "no register assigned");
+
+	for (int i = 0; i < _spill_intervals[reg]->length(); i++) {
+		Interval* it = _spill_intervals[reg]->at(i);
+		remove_from_list(it);
+		split_and_spill_interval(it);
+	}
+
+	if (regHi != any_reg) {
+		IntervalList* processed = _spill_intervals[reg];
+		for (int i = 0; i < _spill_intervals[regHi]->length(); i++) {
+			Interval* it = _spill_intervals[regHi]->at(i);
+			if (processed->index_of(it) == -1) {
+				remove_from_list(it);
+				split_and_spill_interval(it);
+			}
+		}
+	}
 }


 // Split an Interval and spill it to memory so that cur can be placed in a register
 void LinearScanWalker::alloc_locked_reg(Interval* cur) {
-  TRACE_LINEAR_SCAN(2, tty->print("need to split and spill to get register for "); cur->print());
-
-  // collect current usage of registers
-  init_use_lists(false);
-  spill_exclude_active_fixed();
-//  spill_block_unhandled_fixed(cur);
-  assert(unhandled_first(fixedKind) == Interval::end(), "must not have unhandled fixed intervals because all fixed intervals have a use at position 0");
-  spill_block_inactive_fixed(cur);
-  spill_collect_active_any();
-  spill_collect_inactive_any(cur);
+	TRACE_LINEAR_SCAN(2, tty->print("need to split and spill to get register for "); cur->print());
+
+	// collect current usage of registers
+	init_use_lists(false);
+	spill_exclude_active_fixed();
+	//  spill_block_unhandled_fixed(cur);
+	assert(unhandled_first(fixedKind) == Interval::end(), "must not have unhandled fixed intervals because all fixed intervals have a use at position 0");
+	spill_block_inactive_fixed(cur);
+	spill_collect_active_any();
+	spill_collect_inactive_any(cur);

 #ifndef PRODUCT
-  if (TraceLinearScanLevel >= 4) {
-    tty->print_cr("      state of registers:");
-    for (int i = _first_reg; i <= _last_reg; i++) {
-      tty->print("      reg %d: use_pos: %d, block_pos: %d, intervals: ", i, _use_pos[i], _block_pos[i]);
-      for (int j = 0; j < _spill_intervals[i]->length(); j++) {
-        tty->print("%d ", _spill_intervals[i]->at(j)->reg_num());
-      }
-      tty->cr();
-    }
-  }
+	if (TraceLinearScanLevel >= 4) {
+		tty->print_cr("      state of registers:");
+		for (int i = _first_reg; i <= _last_reg; i++) {
+			tty->print("      reg %d: use_pos: %d, block_pos: %d, intervals: ", i, _use_pos[i], _block_pos[i]);
+			for (int j = 0; j < _spill_intervals[i]->length(); j++) {
+				tty->print("%d ", _spill_intervals[i]->at(j)->reg_num());
+			}
+			tty->cr();
+		}
+	}
 #endif

-  // the register must be free at least until this position
-  int reg_needed_until = MIN2(cur->first_usage(mustHaveRegister), cur->from() + 1);
-  int interval_to = cur->to();
-  assert (reg_needed_until > 0 && reg_needed_until < max_jint, "interval has no use");
-
-  int split_pos = 0;
-  int use_pos = 0;
-  bool need_split = false;
-  int reg, regHi;
-
-  if (_adjacent_regs) {
-    reg = find_locked_double_reg(reg_needed_until, interval_to, any_reg, &need_split);
-    regHi = reg + 1;
-
-    if (reg != any_reg) {
-      use_pos = MIN2(_use_pos[reg], _use_pos[regHi]);
-      split_pos = MIN2(_block_pos[reg], _block_pos[regHi]);
-    }
-  } else {
-    reg = find_locked_reg(reg_needed_until, interval_to, any_reg, cur->assigned_reg(), &need_split);
-    regHi = any_reg;
-
-    if (reg != any_reg) {
-      use_pos = _use_pos[reg];
-      split_pos = _block_pos[reg];
-
-      if (_num_phys_regs == 2) {
-        if (cur->assigned_reg() != any_reg) {
-          regHi = reg;
-          reg = cur->assigned_reg();
-        } else {
-          regHi = find_locked_reg(reg_needed_until, interval_to, any_reg, reg, &need_split);
-          if (regHi != any_reg) {
-            use_pos = MIN2(use_pos, _use_pos[regHi]);
-            split_pos = MIN2(split_pos, _block_pos[regHi]);
-          }
-        }
-
-        if (regHi != any_reg && reg > regHi) {
-          // sort register numbers to prevent e.g. a move from eax,ebx to ebx,eax
-          int temp = reg;
-          reg = regHi;
-          regHi = temp;
-        }
-      }
-    }
-  }
-
-  if (reg == any_reg || (_num_phys_regs == 2 && regHi == any_reg) || use_pos <= cur->first_usage(mustHaveRegister)) {
-    // the first use of cur is later than the spilling position -> spill cur
-    TRACE_LINEAR_SCAN(4, tty->print_cr("able to spill current interval. first_usage(register): %d, use_pos: %d", cur->first_usage(mustHaveRegister), use_pos));
-
-    if (cur->first_usage(mustHaveRegister) <= cur->from() + 1) {
-      assert(false, "cannot spill interval that is used in first instruction (possible reason: no register found)");
-      // assign a reasonable register and do a bailout in product mode to avoid errors
-      allocator()->assign_spill_slot(cur);
-      BAILOUT("LinearScan: no register found");
-    }
-
-    split_and_spill_interval(cur);
-  } else {
-    TRACE_LINEAR_SCAN(4, tty->print_cr("decided to use register %d, %d", reg, regHi));
-    assert(reg != any_reg && (_num_phys_regs == 1 || regHi != any_reg), "no register found");
-    assert(split_pos > 0, "invalid split_pos");
-    assert(need_split == false || split_pos > cur->from(), "splitting interval at from");
-
-    cur->assign_reg(reg, regHi);
-    if (need_split) {
-      // register not available for full interval, so split it
-      split_when_partial_register_available(cur, split_pos);
-    }
-
-    // perform splitting and spilling for all affected intervalls
-    split_and_spill_intersecting_intervals(reg, regHi);
-  }
+	// the register must be free at least until this position
+	int reg_needed_until = MIN2(cur->first_usage(mustHaveRegister), cur->from() + 1);
+	int interval_to = cur->to();
+	assert (reg_needed_until > 0 && reg_needed_until < max_jint, "interval has no use");
+
+	int split_pos = 0;
+	int use_pos = 0;
+	bool need_split = false;
+	int reg, regHi;
+
+	if (_adjacent_regs) {
+		reg = find_locked_double_reg(reg_needed_until, interval_to, any_reg, &need_split);
+		regHi = reg + 1;
+
+		if (reg != any_reg) {
+			use_pos = MIN2(_use_pos[reg], _use_pos[regHi]);
+			split_pos = MIN2(_block_pos[reg], _block_pos[regHi]);
+		}
+	} else {
+		reg = find_locked_reg(reg_needed_until, interval_to, any_reg, cur->assigned_reg(), &need_split);
+		regHi = any_reg;
+
+		if (reg != any_reg) {
+			use_pos = _use_pos[reg];
+			split_pos = _block_pos[reg];
+
+			if (_num_phys_regs == 2) {
+				if (cur->assigned_reg() != any_reg) {
+					regHi = reg;
+					reg = cur->assigned_reg();
+				} else {
+					regHi = find_locked_reg(reg_needed_until, interval_to, any_reg, reg, &need_split);
+					if (regHi != any_reg) {
+						use_pos = MIN2(use_pos, _use_pos[regHi]);
+						split_pos = MIN2(split_pos, _block_pos[regHi]);
+					}
+				}
+
+				if (regHi != any_reg && reg > regHi) {
+					// sort register numbers to prevent e.g. a move from eax,ebx to ebx,eax
+					int temp = reg;
+					reg = regHi;
+					regHi = temp;
+				}
+			}
+		}
+	}
+
+	if (reg == any_reg || (_num_phys_regs == 2 && regHi == any_reg) || use_pos <= cur->first_usage(mustHaveRegister)) {
+		// the first use of cur is later than the spilling position -> spill cur
+		TRACE_LINEAR_SCAN(4, tty->print_cr("able to spill current interval. first_usage(register): %d, use_pos: %d", cur->first_usage(mustHaveRegister), use_pos));
+
+		if (cur->first_usage(mustHaveRegister) <= cur->from() + 1) {
+			assert(false, "cannot spill interval that is used in first instruction (possible reason: no register found)");
+			// assign a reasonable register and do a bailout in product mode to avoid errors
+			allocator()->assign_spill_slot(cur);
+			BAILOUT("LinearScan: no register found");
+		}
+
+		split_and_spill_interval(cur);
+	} else {
+		TRACE_LINEAR_SCAN(4, tty->print_cr("decided to use register %d, %d", reg, regHi));
+		assert(reg != any_reg && (_num_phys_regs == 1 || regHi != any_reg), "no register found");
+		assert(split_pos > 0, "invalid split_pos");
+		assert(need_split == false || split_pos > cur->from(), "splitting interval at from");
+
+		cur->assign_reg(reg, regHi);
+		if (need_split) {
+			// register not available for full interval, so split it
+			split_when_partial_register_available(cur, split_pos);
+		}
+
+		// perform splitting and spilling for all affected intervalls
+		split_and_spill_intersecting_intervals(reg, regHi);
+	}
 }

 bool LinearScanWalker::no_allocation_possible(Interval* cur) {
 #ifdef X86
-  // fast calculation of intervals that can never get a register because the
-  // the next instruction is a call that blocks all registers
-  // Note: this does not work if callee-saved registers are available (e.g. on Sparc)
-
-  // check if this interval is the result of a split operation
-  // (an interval got a register until this position)
-  int pos = cur->from();
-  if ((pos & 1) == 1) {
-    // the current instruction is a call that blocks all registers
-    if (pos < allocator()->max_lir_op_id() && allocator()->has_call(pos + 1)) {
-      TRACE_LINEAR_SCAN(4, tty->print_cr("      free register cannot be available because all registers blocked by following call"));
-
-      // safety check that there is really no register available
-      assert(alloc_free_reg(cur) == false, "found a register for this interval");
-      return true;
-    }
-
-  }
+	// fast calculation of intervals that can never get a register because the
+	// the next instruction is a call that blocks all registers
+	// Note: this does not work if callee-saved registers are available (e.g. on Sparc)
+
+	// check if this interval is the result of a split operation
+	// (an interval got a register until this position)
+	int pos = cur->from();
+	if ((pos & 1) == 1) {
+		// the current instruction is a call that blocks all registers
+		if (pos < allocator()->max_lir_op_id() && allocator()->has_call(pos + 1)) {
+			TRACE_LINEAR_SCAN(4, tty->print_cr("      free register cannot be available because all registers blocked by following call"));
+
+			// safety check that there is really no register available
+			assert(alloc_free_reg(cur) == false, "found a register for this interval");
+			return true;
+		}
+
+	}
 #endif
-  return false;
+	return false;
 }

 void LinearScanWalker::init_vars_for_alloc(Interval* cur) {
-  BasicType type = cur->type();
-  _num_phys_regs = LinearScan::num_physical_regs(type);
-  _adjacent_regs = LinearScan::requires_adjacent_regs(type);
-
-  if (pd_init_regs_for_alloc(cur)) {
-    // the appropriate register range was selected.
-  } else if (type == T_FLOAT || type == T_DOUBLE) {
-    _first_reg = pd_first_fpu_reg;
-    _last_reg = pd_last_fpu_reg;
-  } else {
-    _first_reg = pd_first_cpu_reg;
-    _last_reg = pd_last_cpu_reg;
-  }
-
-  assert(0 <= _first_reg && _first_reg < LinearScan::nof_regs, "out of range");
-  assert(0 <= _last_reg && _last_reg < LinearScan::nof_regs, "out of range");
+	BasicType type = cur->type();
+	_num_phys_regs = LinearScan::num_physical_regs(type);
+	_adjacent_regs = LinearScan::requires_adjacent_regs(type);
+
+	if (pd_init_regs_for_alloc(cur)) {
+		// the appropriate register range was selected.
+	} else if (type == T_FLOAT || type == T_DOUBLE) {
+		_first_reg = pd_first_fpu_reg;
+		_last_reg = pd_last_fpu_reg;
+	} else {
+		_first_reg = pd_first_cpu_reg;
+		_last_reg = pd_last_cpu_reg;
+	}
+
+	assert(0 <= _first_reg && _first_reg < LinearScan::nof_regs, "out of range");
+	assert(0 <= _last_reg && _last_reg < LinearScan::nof_regs, "out of range");
 }


 bool LinearScanWalker::is_move(LIR_Op* op, Interval* from, Interval* to) {
-  if (op->code() != lir_move) {
-    return false;
-  }
-  assert(op->as_Op1() != NULL, "move must be LIR_Op1");
-
-  LIR_Opr in = ((LIR_Op1*)op)->in_opr();
-  LIR_Opr res = ((LIR_Op1*)op)->result_opr();
-  return in->is_virtual() && res->is_virtual() && in->vreg_number() == from->reg_num() && res->vreg_number() == to->reg_num();
+	if (op->code() != lir_move) {
+		return false;
+	}
+	assert(op->as_Op1() != NULL, "move must be LIR_Op1");
+
+	LIR_Opr in = ((LIR_Op1*)op)->in_opr();
+	LIR_Opr res = ((LIR_Op1*)op)->result_opr();
+	return in->is_virtual() && res->is_virtual() && in->vreg_number() == from->reg_num() && res->vreg_number() == to->reg_num();
 }

 // optimization (especially for phi functions of nested loops):
 // assign same spill slot to non-intersecting intervals
 void LinearScanWalker::combine_spilled_intervals(Interval* cur) {
-  if (cur->is_split_child()) {
-    // optimization is only suitable for split parents
-    return;
-  }
-
-  Interval* register_hint = cur->register_hint(false);
-  if (register_hint == NULL) {
-    // cur is not the target of a move, otherwise register_hint would be set
-    return;
-  }
-  assert(register_hint->is_split_parent(), "register hint must be split parent");
-
-  if (cur->spill_state() != noOptimization || register_hint->spill_state() != noOptimization) {
-    // combining the stack slots for intervals where spill move optimization is applied
-    // is not benefitial and would cause problems
-    return;
-  }
-
-  int begin_pos = cur->from();
-  int end_pos = cur->to();
-  if (end_pos > allocator()->max_lir_op_id() || (begin_pos & 1) != 0 || (end_pos & 1) != 0) {
-    // safety check that lir_op_with_id is allowed
-    return;
-  }
-
-  if (!is_move(allocator()->lir_op_with_id(begin_pos), register_hint, cur) || !is_move(allocator()->lir_op_with_id(end_pos), cur, register_hint)) {
-    // cur and register_hint are not connected with two moves
-    return;
-  }
-
-  Interval* begin_hint = register_hint->split_child_at_op_id(begin_pos, LIR_OpVisitState::inputMode);
-  Interval* end_hint = register_hint->split_child_at_op_id(end_pos, LIR_OpVisitState::outputMode);
-  if (begin_hint == end_hint || begin_hint->to() != begin_pos || end_hint->from() != end_pos) {
-    // register_hint must be split, otherwise the re-writing of use positions does not work
-    return;
-  }
-
-  assert(begin_hint->assigned_reg() != any_reg, "must have register assigned");
-  assert(end_hint->assigned_reg() == any_reg, "must not have register assigned");
-  assert(cur->first_usage(mustHaveRegister) == begin_pos, "must have use position at begin of interval because of move");
-  assert(end_hint->first_usage(mustHaveRegister) == end_pos, "must have use position at begin of interval because of move");
-
-  if (begin_hint->assigned_reg() < LinearScan::nof_regs) {
-    // register_hint is not spilled at begin_pos, so it would not be benefitial to immediately spill cur
-    return;
-  }
-  assert(register_hint->canonical_spill_slot() != -1, "must be set when part of interval was spilled");
-
-  // modify intervals such that cur gets the same stack slot as register_hint
-  // delete use positions to prevent the intervals to get a register at beginning
-  cur->set_canonical_spill_slot(register_hint->canonical_spill_slot());
-  cur->remove_first_use_pos();
-  end_hint->remove_first_use_pos();
+	if (cur->is_split_child()) {
+		// optimization is only suitable for split parents
+		return;
+	}
+
+	Interval* register_hint = cur->register_hint(false);
+	if (register_hint == NULL) {
+		// cur is not the target of a move, otherwise register_hint would be set
+		return;
+	}
+	assert(register_hint->is_split_parent(), "register hint must be split parent");
+
+	if (cur->spill_state() != noOptimization || register_hint->spill_state() != noOptimization) {
+		// combining the stack slots for intervals where spill move optimization is applied
+		// is not benefitial and would cause problems
+		return;
+	}
+
+	int begin_pos = cur->from();
+	int end_pos = cur->to();
+	if (end_pos > allocator()->max_lir_op_id() || (begin_pos & 1) != 0 || (end_pos & 1) != 0) {
+		// safety check that lir_op_with_id is allowed
+		return;
+	}
+
+	if (!is_move(allocator()->lir_op_with_id(begin_pos), register_hint, cur) || !is_move(allocator()->lir_op_with_id(end_pos), cur, register_hint)) {
+		// cur and register_hint are not connected with two moves
+		return;
+	}
+
+	Interval* begin_hint = register_hint->split_child_at_op_id(begin_pos, LIR_OpVisitState::inputMode);
+	Interval* end_hint = register_hint->split_child_at_op_id(end_pos, LIR_OpVisitState::outputMode);
+	if (begin_hint == end_hint || begin_hint->to() != begin_pos || end_hint->from() != end_pos) {
+		// register_hint must be split, otherwise the re-writing of use positions does not work
+		return;
+	}
+
+	assert(begin_hint->assigned_reg() != any_reg, "must have register assigned");
+	assert(end_hint->assigned_reg() == any_reg, "must not have register assigned");
+	assert(cur->first_usage(mustHaveRegister) == begin_pos, "must have use position at begin of interval because of move");
+	assert(end_hint->first_usage(mustHaveRegister) == end_pos, "must have use position at begin of interval because of move");
+
+	if (begin_hint->assigned_reg() < LinearScan::nof_regs) {
+		// register_hint is not spilled at begin_pos, so it would not be benefitial to immediately spill cur
+		return;
+	}
+	assert(register_hint->canonical_spill_slot() != -1, "must be set when part of interval was spilled");
+
+	// modify intervals such that cur gets the same stack slot as register_hint
+	// delete use positions to prevent the intervals to get a register at beginning
+	cur->set_canonical_spill_slot(register_hint->canonical_spill_slot());
+	cur->remove_first_use_pos();
+	end_hint->remove_first_use_pos();
 }


 // allocate a physical register or memory location to an interval
 bool LinearScanWalker::activate_current() {
-  Interval* cur = current();
-  bool result = true;
-
-  TRACE_LINEAR_SCAN(2, tty->print   ("+++++ activating interval "); cur->print());
-  TRACE_LINEAR_SCAN(4, tty->print_cr("      split_parent: %d, insert_move_when_activated: %d", cur->split_parent()->reg_num(), cur->insert_move_when_activated()));
-
-  if (cur->assigned_reg() >= LinearScan::nof_regs) {
-    // activating an interval that has a stack slot assigned -> split it at first use position
-    // used for method parameters
-    TRACE_LINEAR_SCAN(4, tty->print_cr("      interval has spill slot assigned (method parameter) -> split it before first use"));
-
-    split_stack_interval(cur);
-    result = false;
-
-  } else if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::must_start_in_memory)) {
-    // activating an interval that must start in a stack slot, but may get a register later
-    // used for lir_roundfp: rounding is done by store to stack and reload later
-    TRACE_LINEAR_SCAN(4, tty->print_cr("      interval must start in stack slot -> split it before first use"));
-    assert(cur->assigned_reg() == any_reg && cur->assigned_regHi() == any_reg, "register already assigned");
-
-    allocator()->assign_spill_slot(cur);
-    split_stack_interval(cur);
-    result = false;
-
-  } else if (cur->assigned_reg() == any_reg) {
-    // interval has not assigned register -> normal allocation
-    // (this is the normal case for most intervals)
-    TRACE_LINEAR_SCAN(4, tty->print_cr("      normal allocation of register"));
-
-    // assign same spill slot to non-intersecting intervals
-    combine_spilled_intervals(cur);
-
-    init_vars_for_alloc(cur);
-    if (no_allocation_possible(cur) || !alloc_free_reg(cur)) {
-      // no empty register available.
-      // split and spill another interval so that this interval gets a register
-      alloc_locked_reg(cur);
-    }
-
-    // spilled intervals need not be move to active-list
-    if (cur->assigned_reg() >= LinearScan::nof_regs) {
-      result = false;
-    }
-  }
-
-  // load spilled values that become active from stack slot to register
-  if (cur->insert_move_when_activated()) {
-    assert(cur->is_split_child(), "must be");
-    assert(cur->current_split_child() != NULL, "must be");
-    assert(cur->current_split_child()->reg_num() != cur->reg_num(), "cannot insert move between same interval");
-    TRACE_LINEAR_SCAN(4, tty->print_cr("Inserting move from interval %d to %d because insert_move_when_activated is set", cur->current_split_child()->reg_num(), cur->reg_num()));
-
-    insert_move(cur->from(), cur->current_split_child(), cur);
-  }
-  cur->make_current_split_child();
-
-  return result; // true = interval is moved to active list
+	Interval* cur = current();
+	bool result = true;
+
+	TRACE_LINEAR_SCAN(2, tty->print   ("+++++ activating interval "); cur->print());
+	TRACE_LINEAR_SCAN(4, tty->print_cr("      split_parent: %d, insert_move_when_activated: %d", cur->split_parent()->reg_num(), cur->insert_move_when_activated()));
+
+	if (cur->assigned_reg() >= LinearScan::nof_regs) {
+		// activating an interval that has a stack slot assigned -> split it at first use position
+		// used for method parameters
+		TRACE_LINEAR_SCAN(4, tty->print_cr("      interval has spill slot assigned (method parameter) -> split it before first use"));
+
+		split_stack_interval(cur);
+		result = false;
+
+	} else if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::must_start_in_memory)) {
+		// activating an interval that must start in a stack slot, but may get a register later
+		// used for lir_roundfp: rounding is done by store to stack and reload later
+		TRACE_LINEAR_SCAN(4, tty->print_cr("      interval must start in stack slot -> split it before first use"));
+		assert(cur->assigned_reg() == any_reg && cur->assigned_regHi() == any_reg, "register already assigned");
+
+		allocator()->assign_spill_slot(cur);
+		split_stack_interval(cur);
+		result = false;
+
+	} else if (cur->assigned_reg() == any_reg) {
+		// interval has not assigned register -> normal allocation
+		// (this is the normal case for most intervals)
+		TRACE_LINEAR_SCAN(4, tty->print_cr("      normal allocation of register"));
+
+		// assign same spill slot to non-intersecting intervals
+		combine_spilled_intervals(cur);
+
+		init_vars_for_alloc(cur);
+		if (no_allocation_possible(cur) || !alloc_free_reg(cur)) {
+			// no empty register available.
+			// split and spill another interval so that this interval gets a register
+			alloc_locked_reg(cur);
+		}
+
+		// spilled intervals need not be move to active-list
+		if (cur->assigned_reg() >= LinearScan::nof_regs) {
+			result = false;
+		}
+	}
+
+	// load spilled values that become active from stack slot to register
+	if (cur->insert_move_when_activated()) {
+		assert(cur->is_split_child(), "must be");
+		assert(cur->current_split_child() != NULL, "must be");
+		assert(cur->current_split_child()->reg_num() != cur->reg_num(), "cannot insert move between same interval");
+		TRACE_LINEAR_SCAN(4, tty->print_cr("Inserting move from interval %d to %d because insert_move_when_activated is set", cur->current_split_child()->reg_num(), cur->reg_num()));
+
+		insert_move(cur->from(), cur->current_split_child(), cur);
+	}
+	cur->make_current_split_child();
+
+	return result; // true = interval is moved to active list
 }


 // Implementation of EdgeMoveOptimizer

 EdgeMoveOptimizer::EdgeMoveOptimizer() :
-  _edge_instructions(4),
-  _edge_instructions_idx(4)
+	_edge_instructions(4),
+	_edge_instructions_idx(4)
 {
 }

 void EdgeMoveOptimizer::optimize(BlockList* code) {
-  EdgeMoveOptimizer optimizer = EdgeMoveOptimizer();
-
-  // ignore the first block in the list (index 0 is not processed)
-  for (int i = code->length() - 1; i >= 1; i--) {
-    BlockBegin* block = code->at(i);
-
-    if (block->number_of_preds() > 1 && !block->is_set(BlockBegin::exception_entry_flag)) {
-      optimizer.optimize_moves_at_block_end(block);
-    }
-    if (block->number_of_sux() == 2) {
-      optimizer.optimize_moves_at_block_begin(block);
-    }
-  }
+	EdgeMoveOptimizer optimizer = EdgeMoveOptimizer();
+
+	// ignore the first block in the list (index 0 is not processed)
+	for (int i = code->length() - 1; i >= 1; i--) {
+		BlockBegin* block = code->at(i);
+
+		if (block->number_of_preds() > 1 && !block->is_set(BlockBegin::exception_entry_flag)) {
+			optimizer.optimize_moves_at_block_end(block);
+		}
+		if (block->number_of_sux() == 2) {
+			optimizer.optimize_moves_at_block_begin(block);
+		}
+	}
 }


 // clear all internal data structures
 void EdgeMoveOptimizer::init_instructions() {
-  _edge_instructions.clear();
-  _edge_instructions_idx.clear();
+	_edge_instructions.clear();
+	_edge_instructions_idx.clear();
 }

 // append a lir-instruction-list and the index of the current operation in to the list
 void EdgeMoveOptimizer::append_instructions(LIR_OpList* instructions, int instructions_idx) {
-  _edge_instructions.append(instructions);
-  _edge_instructions_idx.append(instructions_idx);
+	_edge_instructions.append(instructions);
+	_edge_instructions_idx.append(instructions_idx);
 }

 // return the current operation of the given edge (predecessor or successor)
 LIR_Op* EdgeMoveOptimizer::instruction_at(int edge) {
-  LIR_OpList* instructions = _edge_instructions.at(edge);
-  int idx = _edge_instructions_idx.at(edge);
-
-  if (idx < instructions->length()) {
-    return instructions->at(idx);
-  } else {
-    return NULL;
-  }
+	LIR_OpList* instructions = _edge_instructions.at(edge);
+	int idx = _edge_instructions_idx.at(edge);
+
+	if (idx < instructions->length()) {
+		return instructions->at(idx);
+	} else {
+		return NULL;
+	}
 }

 // removes the current operation of the given edge (predecessor or successor)
 void EdgeMoveOptimizer::remove_cur_instruction(int edge, bool decrement_index) {
-  LIR_OpList* instructions = _edge_instructions.at(edge);
-  int idx = _edge_instructions_idx.at(edge);
-  instructions->remove_at(idx);
-
-  if (decrement_index) {
-    _edge_instructions_idx.at_put(edge, idx - 1);
-  }
+	LIR_OpList* instructions = _edge_instructions.at(edge);
+	int idx = _edge_instructions_idx.at(edge);
+	instructions->remove_at(idx);
+
+	if (decrement_index) {
+		_edge_instructions_idx.at_put(edge, idx - 1);
+	}
 }


 bool EdgeMoveOptimizer::operations_different(LIR_Op* op1, LIR_Op* op2) {
-  if (op1 == NULL || op2 == NULL) {
-    // at least one block is already empty -> no optimization possible
-    return true;
-  }
-
-  if (op1->code() == lir_move && op2->code() == lir_move) {
-    assert(op1->as_Op1() != NULL, "move must be LIR_Op1");
-    assert(op2->as_Op1() != NULL, "move must be LIR_Op1");
-    LIR_Op1* move1 = (LIR_Op1*)op1;
-    LIR_Op1* move2 = (LIR_Op1*)op2;
-    if (move1->info() == move2->info() && move1->in_opr() == move2->in_opr() && move1->result_opr() == move2->result_opr()) {
-      // these moves are exactly equal and can be optimized
-      return false;
-    }
-
-  } else if (op1->code() == lir_fxch && op2->code() == lir_fxch) {
-    assert(op1->as_Op1() != NULL, "fxch must be LIR_Op1");
-    assert(op2->as_Op1() != NULL, "fxch must be LIR_Op1");
-    LIR_Op1* fxch1 = (LIR_Op1*)op1;
-    LIR_Op1* fxch2 = (LIR_Op1*)op2;
-    if (fxch1->in_opr()->as_jint() == fxch2->in_opr()->as_jint()) {
-      // equal FPU stack operations can be optimized
-      return false;
-    }
-
-  } else if (op1->code() == lir_fpop_raw && op2->code() == lir_fpop_raw) {
-    // equal FPU stack operations can be optimized
-    return false;
-  }
-
-  // no optimization possible
-  return true;
+	if (op1 == NULL || op2 == NULL) {
+		// at least one block is already empty -> no optimization possible
+		return true;
+	}
+
+	if (op1->code() == lir_move && op2->code() == lir_move) {
+		assert(op1->as_Op1() != NULL, "move must be LIR_Op1");
+		assert(op2->as_Op1() != NULL, "move must be LIR_Op1");
+		LIR_Op1* move1 = (LIR_Op1*)op1;
+		LIR_Op1* move2 = (LIR_Op1*)op2;
+		if (move1->info() == move2->info() && move1->in_opr() == move2->in_opr() && move1->result_opr() == move2->result_opr()) {
+			// these moves are exactly equal and can be optimized
+			return false;
+		}
+
+	} else if (op1->code() == lir_fxch && op2->code() == lir_fxch) {
+		assert(op1->as_Op1() != NULL, "fxch must be LIR_Op1");
+		assert(op2->as_Op1() != NULL, "fxch must be LIR_Op1");
+		LIR_Op1* fxch1 = (LIR_Op1*)op1;
+		LIR_Op1* fxch2 = (LIR_Op1*)op2;
+		if (fxch1->in_opr()->as_jint() == fxch2->in_opr()->as_jint()) {
+			// equal FPU stack operations can be optimized
+			return false;
+		}
+
+	} else if (op1->code() == lir_fpop_raw && op2->code() == lir_fpop_raw) {
+		// equal FPU stack operations can be optimized
+		return false;
+	}
+
+	// no optimization possible
+	return true;
 }

 void EdgeMoveOptimizer::optimize_moves_at_block_end(BlockBegin* block) {
-  TRACE_LINEAR_SCAN(4, tty->print_cr("optimizing moves at end of block B%d", block->block_id()));
-
-  if (block->is_predecessor(block)) {
-    // currently we can't handle this correctly.
-    return;
-  }
-
-  init_instructions();
-  int num_preds = block->number_of_preds();
-  assert(num_preds > 1, "do not call otherwise");
-  assert(!block->is_set(BlockBegin::exception_entry_flag), "exception handlers not allowed");
-
-  // setup a list with the lir-instructions of all predecessors
-  int i;
-  for (i = 0; i < num_preds; i++) {
-    BlockBegin* pred = block->pred_at(i);
-    LIR_OpList* pred_instructions = pred->lir()->instructions_list();
-
-    if (pred->number_of_sux() != 1) {
-      // this can happen with switch-statements where multiple edges are between
-      // the same blocks.
-      return;
-    }
-
-    assert(pred->number_of_sux() == 1, "can handle only one successor");
-    assert(pred->sux_at(0) == block, "invalid control flow");
-    assert(pred_instructions->last()->code() == lir_branch, "block with successor must end with branch");
-    assert(pred_instructions->last()->as_OpBranch() != NULL, "branch must be LIR_OpBranch");
-    assert(pred_instructions->last()->as_OpBranch()->cond() == lir_cond_always, "block must end with unconditional branch");
-
-    if (pred_instructions->last()->info() != NULL) {
-      // can not optimize instructions when debug info is needed
-      return;
-    }
-
-    // ignore the unconditional branch at the end of the block
-    append_instructions(pred_instructions, pred_instructions->length() - 2);
-  }
-
-
-  // process lir-instructions while all predecessors end with the same instruction
-  while (true) {
-    LIR_Op* op = instruction_at(0);
-    for (i = 1; i < num_preds; i++) {
-      if (operations_different(op, instruction_at(i))) {
-        // these instructions are different and cannot be optimized ->
-        // no further optimization possible
-        return;
-      }
-    }
-
-    TRACE_LINEAR_SCAN(4, tty->print("found instruction that is equal in all %d predecessors: ", num_preds); op->print());
-
-    // insert the instruction at the beginning of the current block
-    block->lir()->insert_before(1, op);
-
-    // delete the instruction at the end of all predecessors
-    for (i = 0; i < num_preds; i++) {
-      remove_cur_instruction(i, true);
-    }
-  }
+	TRACE_LINEAR_SCAN(4, tty->print_cr("optimizing moves at end of block B%d", block->block_id()));
+
+	if (block->is_predecessor(block)) {
+		// currently we can't handle this correctly.
+		return;
+	}
+
+	init_instructions();
+	int num_preds = block->number_of_preds();
+	assert(num_preds > 1, "do not call otherwise");
+	assert(!block->is_set(BlockBegin::exception_entry_flag), "exception handlers not allowed");
+
+	// setup a list with the lir-instructions of all predecessors
+	int i;
+	for (i = 0; i < num_preds; i++) {
+		BlockBegin* pred = block->pred_at(i);
+		LIR_OpList* pred_instructions = pred->lir()->instructions_list();
+
+		if (pred->number_of_sux() != 1) {
+			// this can happen with switch-statements where multiple edges are between
+			// the same blocks.
+			return;
+		}
+
+		assert(pred->number_of_sux() == 1, "can handle only one successor");
+		assert(pred->sux_at(0) == block, "invalid control flow");
+		assert(pred_instructions->last()->code() == lir_branch, "block with successor must end with branch");
+		assert(pred_instructions->last()->as_OpBranch() != NULL, "branch must be LIR_OpBranch");
+		assert(pred_instructions->last()->as_OpBranch()->cond() == lir_cond_always, "block must end with unconditional branch");
+
+		if (pred_instructions->last()->info() != NULL) {
+			// can not optimize instructions when debug info is needed
+			return;
+		}
+
+		// ignore the unconditional branch at the end of the block
+		append_instructions(pred_instructions, pred_instructions->length() - 2);
+	}
+
+
+	// process lir-instructions while all predecessors end with the same instruction
+	while (true) {
+		LIR_Op* op = instruction_at(0);
+		for (i = 1; i < num_preds; i++) {
+			if (operations_different(op, instruction_at(i))) {
+				// these instructions are different and cannot be optimized ->
+				// no further optimization possible
+				return;
+			}
+		}
+
+		TRACE_LINEAR_SCAN(4, tty->print("found instruction that is equal in all %d predecessors: ", num_preds); op->print());
+
+		// insert the instruction at the beginning of the current block
+		block->lir()->insert_before(1, op);
+
+		// delete the instruction at the end of all predecessors
+		for (i = 0; i < num_preds; i++) {
+			remove_cur_instruction(i, true);
+		}
+	}
 }


 void EdgeMoveOptimizer::optimize_moves_at_block_begin(BlockBegin* block) {
-  TRACE_LINEAR_SCAN(4, tty->print_cr("optimization moves at begin of block B%d", block->block_id()));
-
-  init_instructions();
-  int num_sux = block->number_of_sux();
-
-  LIR_OpList* cur_instructions = block->lir()->instructions_list();
-
-  assert(num_sux == 2, "method should not be called otherwise");
-  assert(cur_instructions->last()->code() == lir_branch, "block with successor must end with branch");
-  assert(cur_instructions->last()->as_OpBranch() != NULL, "branch must be LIR_OpBranch");
-  assert(cur_instructions->last()->as_OpBranch()->cond() == lir_cond_always, "block must end with unconditional branch");
-
-  if (cur_instructions->last()->info() != NULL) {
-    // can no optimize instructions when debug info is needed
-    return;
-  }
-
-  LIR_Op* branch = cur_instructions->at(cur_instructions->length() - 2);
-  if (branch->info() != NULL || (branch->code() != lir_branch && branch->code() != lir_cond_float_branch)) {
-    // not a valid case for optimization
-    // currently, only blocks that end with two branches (conditional branch followed
-    // by unconditional branch) are optimized
-    return;
-  }
-
-  // now it is guaranteed that the block ends with two branch instructions.
-  // the instructions are inserted at the end of the block before these two branches
-  int insert_idx = cur_instructions->length() - 2;
-
-  int i;
+	TRACE_LINEAR_SCAN(4, tty->print_cr("optimization moves at begin of block B%d", block->block_id()));
+
+	init_instructions();
+	int num_sux = block->number_of_sux();
+
+	LIR_OpList* cur_instructions = block->lir()->instructions_list();
+
+	assert(num_sux == 2, "method should not be called otherwise");
+	assert(cur_instructions->last()->code() == lir_branch, "block with successor must end with branch");
+	assert(cur_instructions->last()->as_OpBranch() != NULL, "branch must be LIR_OpBranch");
+	assert(cur_instructions->last()->as_OpBranch()->cond() == lir_cond_always, "block must end with unconditional branch");
+
+	if (cur_instructions->last()->info() != NULL) {
+		// can no optimize instructions when debug info is needed
+		return;
+	}
+
+	LIR_Op* branch = cur_instructions->at(cur_instructions->length() - 2);
+	if (branch->info() != NULL || (branch->code() != lir_branch && branch->code() != lir_cond_float_branch)) {
+		// not a valid case for optimization
+		// currently, only blocks that end with two branches (conditional branch followed
+		// by unconditional branch) are optimized
+		return;
+	}
+
+	// now it is guaranteed that the block ends with two branch instructions.
+	// the instructions are inserted at the end of the block before these two branches
+	int insert_idx = cur_instructions->length() - 2;
+
+	int i;
 #ifdef ASSERT
-  for (i = insert_idx - 1; i >= 0; i--) {
-    LIR_Op* op = cur_instructions->at(i);
-    if ((op->code() == lir_branch || op->code() == lir_cond_float_branch) && ((LIR_OpBranch*)op)->block() != NULL) {
-      assert(false, "block with two successors can have only two branch instructions");
-    }
-  }
+	for (i = insert_idx - 1; i >= 0; i--) {
+		LIR_Op* op = cur_instructions->at(i);
+		if ((op->code() == lir_branch || op->code() == lir_cond_float_branch) && ((LIR_OpBranch*)op)->block() != NULL) {
+			assert(false, "block with two successors can have only two branch instructions");
+		}
+	}
 #endif

-  // setup a list with the lir-instructions of all successors
-  for (i = 0; i < num_sux; i++) {
-    BlockBegin* sux = block->sux_at(i);
-    LIR_OpList* sux_instructions = sux->lir()->instructions_list();
-
-    assert(sux_instructions->at(0)->code() == lir_label, "block must start with label");
-
-    if (sux->number_of_preds() != 1) {
-      // this can happen with switch-statements where multiple edges are between
-      // the same blocks.
-      return;
-    }
-    assert(sux->pred_at(0) == block, "invalid control flow");
-    assert(!sux->is_set(BlockBegin::exception_entry_flag), "exception handlers not allowed");
-
-    // ignore the label at the beginning of the block
-    append_instructions(sux_instructions, 1);
-  }
-
-  // process lir-instructions while all successors begin with the same instruction
-  while (true) {
-    LIR_Op* op = instruction_at(0);
-    for (i = 1; i < num_sux; i++) {
-      if (operations_different(op, instruction_at(i))) {
-        // these instructions are different and cannot be optimized ->
-        // no further optimization possible
-        return;
-      }
-    }
-
-    TRACE_LINEAR_SCAN(4, tty->print("----- found instruction that is equal in all %d successors: ", num_sux); op->print());
-
-    // insert instruction at end of current block
-    block->lir()->insert_before(insert_idx, op);
-    insert_idx++;
-
-    // delete the instructions at the beginning of all successors
-    for (i = 0; i < num_sux; i++) {
-      remove_cur_instruction(i, false);
-    }
-  }
+	// setup a list with the lir-instructions of all successors
+	for (i = 0; i < num_sux; i++) {
+		BlockBegin* sux = block->sux_at(i);
+		LIR_OpList* sux_instructions = sux->lir()->instructions_list();
+
+		assert(sux_instructions->at(0)->code() == lir_label, "block must start with label");
+
+		if (sux->number_of_preds() != 1) {
+			// this can happen with switch-statements where multiple edges are between
+			// the same blocks.
+			return;
+		}
+		assert(sux->pred_at(0) == block, "invalid control flow");
+		assert(!sux->is_set(BlockBegin::exception_entry_flag), "exception handlers not allowed");
+
+		// ignore the label at the beginning of the block
+		append_instructions(sux_instructions, 1);
+	}
+
+	// process lir-instructions while all successors begin with the same instruction
+	while (true) {
+		LIR_Op* op = instruction_at(0);
+		for (i = 1; i < num_sux; i++) {
+			if (operations_different(op, instruction_at(i))) {
+				// these instructions are different and cannot be optimized ->
+				// no further optimization possible
+				return;
+			}
+		}
+
+		TRACE_LINEAR_SCAN(4, tty->print("----- found instruction that is equal in all %d successors: ", num_sux); op->print());
+
+		// insert instruction at end of current block
+		block->lir()->insert_before(insert_idx, op);
+		insert_idx++;
+
+		// delete the instructions at the beginning of all successors
+		for (i = 0; i < num_sux; i++) {
+			remove_cur_instruction(i, false);
+		}
+	}
 }


 // Implementation of ControlFlowOptimizer

 ControlFlowOptimizer::ControlFlowOptimizer() :
-  _original_preds(4)
+	_original_preds(4)
 {
 }

 void ControlFlowOptimizer::optimize(BlockList* code) {
-  ControlFlowOptimizer optimizer = ControlFlowOptimizer();
-
-  // push the OSR entry block to the end so that we're not jumping over it.
-  BlockBegin* osr_entry = code->at(0)->end()->as_Base()->osr_entry();
-  if (osr_entry) {
-    int index = osr_entry->linear_scan_number();
-    assert(code->at(index) == osr_entry, "wrong index");
-    code->remove_at(index);
-    code->append(osr_entry);
-  }
-
-  optimizer.reorder_short_loops(code);
-  optimizer.delete_empty_blocks(code);
-  optimizer.delete_unnecessary_jumps(code);
-  optimizer.delete_jumps_to_return(code);
+	ControlFlowOptimizer optimizer = ControlFlowOptimizer();
+
+	// push the OSR entry block to the end so that we're not jumping over it.
+	BlockBegin* osr_entry = code->at(0)->end()->as_Base()->osr_entry();
+	if (osr_entry) {
+		int index = osr_entry->linear_scan_number();
+		assert(code->at(index) == osr_entry, "wrong index");
+		code->remove_at(index);
+		code->append(osr_entry);
+	}
+
+	optimizer.reorder_short_loops(code);
+	optimizer.delete_empty_blocks(code);
+	optimizer.delete_unnecessary_jumps(code);
+	optimizer.delete_jumps_to_return(code);
 }

 void ControlFlowOptimizer::reorder_short_loop(BlockList* code, BlockBegin* header_block, int header_idx) {
-  int i = header_idx + 1;
-  int max_end = MIN2(header_idx + ShortLoopSize, code->length());
-  while (i < max_end && code->at(i)->loop_depth() >= header_block->loop_depth()) {
-    i++;
-  }
-
-  if (i == code->length() || code->at(i)->loop_depth() < header_block->loop_depth()) {
-    int end_idx = i - 1;
-    BlockBegin* end_block = code->at(end_idx);
-
-    if (end_block->number_of_sux() == 1 && end_block->sux_at(0) == header_block) {
-      // short loop from header_idx to end_idx found -> reorder blocks such that
-      // the header_block is the last block instead of the first block of the loop
-      TRACE_LINEAR_SCAN(1, tty->print_cr("Reordering short loop: length %d, header B%d, end B%d",
-                                         end_idx - header_idx + 1,
-                                         header_block->block_id(), end_block->block_id()));
-
-      for (int j = header_idx; j < end_idx; j++) {
-        code->at_put(j, code->at(j + 1));
-      }
-      code->at_put(end_idx, header_block);
-
-      // correct the flags so that any loop alignment occurs in the right place.
-      assert(code->at(end_idx)->is_set(BlockBegin::backward_branch_target_flag), "must be backward branch target");
-      code->at(end_idx)->clear(BlockBegin::backward_branch_target_flag);
-      code->at(header_idx)->set(BlockBegin::backward_branch_target_flag);
-    }
-  }
+	int i = header_idx + 1;
+	int max_end = MIN2(header_idx + ShortLoopSize, code->length());
+	while (i < max_end && code->at(i)->loop_depth() >= header_block->loop_depth()) {
+		i++;
+	}
+
+	if (i == code->length() || code->at(i)->loop_depth() < header_block->loop_depth()) {
+		int end_idx = i - 1;
+		BlockBegin* end_block = code->at(end_idx);
+
+		if (end_block->number_of_sux() == 1 && end_block->sux_at(0) == header_block) {
+			// short loop from header_idx to end_idx found -> reorder blocks such that
+			// the header_block is the last block instead of the first block of the loop
+			TRACE_LINEAR_SCAN(1, tty->print_cr("Reordering short loop: length %d, header B%d, end B%d",
+						end_idx - header_idx + 1,
+						header_block->block_id(), end_block->block_id()));
+
+			for (int j = header_idx; j < end_idx; j++) {
+				code->at_put(j, code->at(j + 1));
+			}
+			code->at_put(end_idx, header_block);
+
+			// correct the flags so that any loop alignment occurs in the right place.
+			assert(code->at(end_idx)->is_set(BlockBegin::backward_branch_target_flag), "must be backward branch target");
+			code->at(end_idx)->clear(BlockBegin::backward_branch_target_flag);
+			code->at(header_idx)->set(BlockBegin::backward_branch_target_flag);
+		}
+	}
 }

 void ControlFlowOptimizer::reorder_short_loops(BlockList* code) {
-  for (int i = code->length() - 1; i >= 0; i--) {
-    BlockBegin* block = code->at(i);
-
-    if (block->is_set(BlockBegin::linear_scan_loop_header_flag)) {
-      reorder_short_loop(code, block, i);
-    }
-  }
-
-  DEBUG_ONLY(verify(code));
+	for (int i = code->length() - 1; i >= 0; i--) {
+		BlockBegin* block = code->at(i);
+
+		if (block->is_set(BlockBegin::linear_scan_loop_header_flag)) {
+			reorder_short_loop(code, block, i);
+		}
+	}
+
+	DEBUG_ONLY(verify(code));
 }

 // only blocks with exactly one successor can be deleted. Such blocks
 // must always end with an unconditional branch to this successor
 bool ControlFlowOptimizer::can_delete_block(BlockBegin* block) {
-  if (block->number_of_sux() != 1 || block->number_of_exception_handlers() != 0 || block->is_entry_block()) {
-    return false;
-  }
-
-  LIR_OpList* instructions = block->lir()->instructions_list();
-
-  assert(instructions->length() >= 2, "block must have label and branch");
-  assert(instructions->at(0)->code() == lir_label, "first instruction must always be a label");
-  assert(instructions->last()->as_OpBranch() != NULL, "last instrcution must always be a branch");
-  assert(instructions->last()->as_OpBranch()->cond() == lir_cond_always, "branch must be unconditional");
-  assert(instructions->last()->as_OpBranch()->block() == block->sux_at(0), "branch target must be the successor");
-
-  // block must have exactly one successor
-
-  if (instructions->length() == 2 && instructions->last()->info() == NULL) {
-    return true;
-  }
-  return false;
+	if (block->number_of_sux() != 1 || block->number_of_exception_handlers() != 0 || block->is_entry_block()) {
+		return false;
+	}
+
+	LIR_OpList* instructions = block->lir()->instructions_list();
+
+	assert(instructions->length() >= 2, "block must have label and branch");
+	assert(instructions->at(0)->code() == lir_label, "first instruction must always be a label");
+	assert(instructions->last()->as_OpBranch() != NULL, "last instrcution must always be a branch");
+	assert(instructions->last()->as_OpBranch()->cond() == lir_cond_always, "branch must be unconditional");
+	assert(instructions->last()->as_OpBranch()->block() == block->sux_at(0), "branch target must be the successor");
+
+	// block must have exactly one successor
+
+	if (instructions->length() == 2 && instructions->last()->info() == NULL) {
+		return true;
+	}
+	return false;
 }

 // substitute branch targets in all branch-instructions of this blocks
 void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegin* target_from, BlockBegin* target_to) {
-  TRACE_LINEAR_SCAN(3, tty->print_cr("Deleting empty block: substituting from B%d to B%d inside B%d", target_from->block_id(), target_to->block_id(), block->block_id()));
-
-  LIR_OpList* instructions = block->lir()->instructions_list();
-
-  assert(instructions->at(0)->code() == lir_label, "first instruction must always be a label");
-  for (int i = instructions->length() - 1; i >= 1; i--) {
-    LIR_Op* op = instructions->at(i);
-
-    if (op->code() == lir_branch || op->code() == lir_cond_float_branch) {
-      assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
-      LIR_OpBranch* branch = (LIR_OpBranch*)op;
-
-      if (branch->block() == target_from) {
-        branch->change_block(target_to);
-      }
-      if (branch->ublock() == target_from) {
-        branch->change_ublock(target_to);
-      }
-    }
-  }
+	TRACE_LINEAR_SCAN(3, tty->print_cr("Deleting empty block: substituting from B%d to B%d inside B%d", target_from->block_id(), target_to->block_id(), block->block_id()));
+
+	LIR_OpList* instructions = block->lir()->instructions_list();
+
+	assert(instructions->at(0)->code() == lir_label, "first instruction must always be a label");
+	for (int i = instructions->length() - 1; i >= 1; i--) {
+		LIR_Op* op = instructions->at(i);
+
+		if (op->code() == lir_branch || op->code() == lir_cond_float_branch) {
+			assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
+			LIR_OpBranch* branch = (LIR_OpBranch*)op;
+
+			if (branch->block() == target_from) {
+				branch->change_block(target_to);
+			}
+			if (branch->ublock() == target_from) {
+				branch->change_ublock(target_to);
+			}
+		}
+	}
 }

 void ControlFlowOptimizer::delete_empty_blocks(BlockList* code) {
-  int old_pos = 0;
-  int new_pos = 0;
-  int num_blocks = code->length();
-
-  while (old_pos < num_blocks) {
-    BlockBegin* block = code->at(old_pos);
-
-    if (can_delete_block(block)) {
-      BlockBegin* new_target = block->sux_at(0);
-
-      // propagate backward branch target flag for correct code alignment
-      if (block->is_set(BlockBegin::backward_branch_target_flag)) {
-        new_target->set(BlockBegin::backward_branch_target_flag);
-      }
-
-      // collect a list with all predecessors that contains each predecessor only once
-      // the predecessors of cur are changed during the substitution, so a copy of the
-      // predecessor list is necessary
-      int j;
-      _original_preds.clear();
-      for (j = block->number_of_preds() - 1; j >= 0; j--) {
-        BlockBegin* pred = block->pred_at(j);
-        if (_original_preds.index_of(pred) == -1) {
-          _original_preds.append(pred);
-        }
-      }
-
-      for (j = _original_preds.length() - 1; j >= 0; j--) {
-        BlockBegin* pred = _original_preds.at(j);
-        substitute_branch_target(pred, block, new_target);
-        pred->substitute_sux(block, new_target);
-      }
-    } else {
-      // adjust position of this block in the block list if blocks before
-      // have been deleted
-      if (new_pos != old_pos) {
-        code->at_put(new_pos, code->at(old_pos));
-      }
-      new_pos++;
-    }
-    old_pos++;
-  }
-  code->truncate(new_pos);
-
-  DEBUG_ONLY(verify(code));
+	int old_pos = 0;
+	int new_pos = 0;
+	int num_blocks = code->length();
+
+	while (old_pos < num_blocks) {
+		BlockBegin* block = code->at(old_pos);
+
+		if (can_delete_block(block)) {
+			BlockBegin* new_target = block->sux_at(0);
+
+			// propagate backward branch target flag for correct code alignment
+			if (block->is_set(BlockBegin::backward_branch_target_flag)) {
+				new_target->set(BlockBegin::backward_branch_target_flag);
+			}
+
+			// collect a list with all predecessors that contains each predecessor only once
+			// the predecessors of cur are changed during the substitution, so a copy of the
+			// predecessor list is necessary
+			int j;
+			_original_preds.clear();
+			for (j = block->number_of_preds() - 1; j >= 0; j--) {
+				BlockBegin* pred = block->pred_at(j);
+				if (_original_preds.index_of(pred) == -1) {
+					_original_preds.append(pred);
+				}
+			}
+
+			for (j = _original_preds.length() - 1; j >= 0; j--) {
+				BlockBegin* pred = _original_preds.at(j);
+				substitute_branch_target(pred, block, new_target);
+				pred->substitute_sux(block, new_target);
+			}
+		} else {
+			// adjust position of this block in the block list if blocks before
+			// have been deleted
+			if (new_pos != old_pos) {
+				code->at_put(new_pos, code->at(old_pos));
+			}
+			new_pos++;
+		}
+		old_pos++;
+	}
+	code->truncate(new_pos);
+
+	DEBUG_ONLY(verify(code));
 }

 void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
-  // skip the last block because there a branch is always necessary
-  for (int i = code->length() - 2; i >= 0; i--) {
-    BlockBegin* block = code->at(i);
-    LIR_OpList* instructions = block->lir()->instructions_list();
-
-    LIR_Op* last_op = instructions->last();
-    if (last_op->code() == lir_branch) {
-      assert(last_op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
-      LIR_OpBranch* last_branch = (LIR_OpBranch*)last_op;
-
-      assert(last_branch->block() != NULL, "last branch must always have a block as target");
-      assert(last_branch->label() == last_branch->block()->label(), "must be equal");
-
-      if (last_branch->info() == NULL) {
-        if (last_branch->block() == code->at(i + 1)) {
-
-          TRACE_LINEAR_SCAN(3, tty->print_cr("Deleting unconditional branch at end of block B%d", block->block_id()));
-
-          // delete last branch instruction
-          instructions->truncate(instructions->length() - 1);
-
-        } else {
-          LIR_Op* prev_op = instructions->at(instructions->length() - 2);
-          if (prev_op->code() == lir_branch || prev_op->code() == lir_cond_float_branch) {
-            assert(prev_op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
-            LIR_OpBranch* prev_branch = (LIR_OpBranch*)prev_op;
-
-            if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) {
-
-              TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id()));
-
-              // eliminate a conditional branch to the immediate successor
-              prev_branch->change_block(last_branch->block());
-              prev_branch->negate_cond();
-              instructions->truncate(instructions->length() - 1);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  DEBUG_ONLY(verify(code));
+	// skip the last block because there a branch is always necessary
+	for (int i = code->length() - 2; i >= 0; i--) {
+		BlockBegin* block = code->at(i);
+		LIR_OpList* instructions = block->lir()->instructions_list();
+
+		LIR_Op* last_op = instructions->last();
+		if (last_op->code() == lir_branch) {
+			assert(last_op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
+			LIR_OpBranch* last_branch = (LIR_OpBranch*)last_op;
+
+			assert(last_branch->block() != NULL, "last branch must always have a block as target");
+			assert(last_branch->label() == last_branch->block()->label(), "must be equal");
+
+			if (last_branch->info() == NULL) {
+				if (last_branch->block() == code->at(i + 1)) {
+
+					TRACE_LINEAR_SCAN(3, tty->print_cr("Deleting unconditional branch at end of block B%d", block->block_id()));
+
+					// delete last branch instruction
+					instructions->truncate(instructions->length() - 1);
+
+				} else {
+					LIR_Op* prev_op = instructions->at(instructions->length() - 2);
+					if (prev_op->code() == lir_branch || prev_op->code() == lir_cond_float_branch) {
+						assert(prev_op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
+						LIR_OpBranch* prev_branch = (LIR_OpBranch*)prev_op;
+
+						if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) {
+
+							TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id()));
+
+							// eliminate a conditional branch to the immediate successor
+							prev_branch->change_block(last_branch->block());
+							prev_branch->negate_cond();
+							instructions->truncate(instructions->length() - 1);
+						}
+					}
+				}
+			}
+		}
+	}
+
+	DEBUG_ONLY(verify(code));
 }

 void ControlFlowOptimizer::delete_jumps_to_return(BlockList* code) {
 #ifdef ASSERT
-  BitMap return_converted(BlockBegin::number_of_blocks());
-  return_converted.clear();
+	BitMap return_converted(BlockBegin::number_of_blocks());
+	return_converted.clear();
 #endif

-  for (int i = code->length() - 1; i >= 0; i--) {
-    BlockBegin* block = code->at(i);
-    LIR_OpList* cur_instructions = block->lir()->instructions_list();
-    LIR_Op*     cur_last_op = cur_instructions->last();
-
-    assert(cur_instructions->at(0)->code() == lir_label, "first instruction must always be a label");
-    if (cur_instructions->length() == 2 && cur_last_op->code() == lir_return) {
-      // the block contains only a label and a return
-      // if a predecessor ends with an unconditional jump to this block, then the jump
-      // can be replaced with a return instruction
-      //
-      // Note: the original block with only a return statement cannot be deleted completely
-      //       because the predecessors might have other (conditional) jumps to this block
-      //       -> this may lead to unnecesary return instructions in the final code
-
-      assert(cur_last_op->info() == NULL, "return instructions do not have debug information");
-      assert(block->number_of_sux() == 0 ||
-             (return_converted.at(block->block_id()) && block->number_of_sux() == 1),
-             "blocks that end with return must not have successors");
-
-      assert(cur_last_op->as_Op1() != NULL, "return must be LIR_Op1");
-      LIR_Opr return_opr = ((LIR_Op1*)cur_last_op)->in_opr();
-
-      for (int j = block->number_of_preds() - 1; j >= 0; j--) {
-        BlockBegin* pred = block->pred_at(j);
-        LIR_OpList* pred_instructions = pred->lir()->instructions_list();
-        LIR_Op*     pred_last_op = pred_instructions->last();
-
-        if (pred_last_op->code() == lir_branch) {
-          assert(pred_last_op->as_OpBranch() != NULL, "branch must be LIR_OpBranch");
-          LIR_OpBranch* pred_last_branch = (LIR_OpBranch*)pred_last_op;
-
-          if (pred_last_branch->block() == block && pred_last_branch->cond() == lir_cond_always && pred_last_branch->info() == NULL) {
-            // replace the jump to a return with a direct return
-            // Note: currently the edge between the blocks is not deleted
-            pred_instructions->at_put(pred_instructions->length() - 1, new LIR_Op1(lir_return, return_opr));
+	for (int i = code->length() - 1; i >= 0; i--) {
+		BlockBegin* block = code->at(i);
+		LIR_OpList* cur_instructions = block->lir()->instructions_list();
+		LIR_Op*     cur_last_op = cur_instructions->last();
+
+		assert(cur_instructions->at(0)->code() == lir_label, "first instruction must always be a label");
+		if (cur_instructions->length() == 2 && cur_last_op->code() == lir_return) {
+			// the block contains only a label and a return
+			// if a predecessor ends with an unconditional jump to this block, then the jump
+			// can be replaced with a return instruction
+			//
+			// Note: the original block with only a return statement cannot be deleted completely
+			//       because the predecessors might have other (conditional) jumps to this block
+			//       -> this may lead to unnecesary return instructions in the final code
+
+			assert(cur_last_op->info() == NULL, "return instructions do not have debug information");
+			assert(block->number_of_sux() == 0 ||
+					(return_converted.at(block->block_id()) && block->number_of_sux() == 1),
+					"blocks that end with return must not have successors");
+
+			assert(cur_last_op->as_Op1() != NULL, "return must be LIR_Op1");
+			LIR_Opr return_opr = ((LIR_Op1*)cur_last_op)->in_opr();
+
+			for (int j = block->number_of_preds() - 1; j >= 0; j--) {
+				BlockBegin* pred = block->pred_at(j);
+				LIR_OpList* pred_instructions = pred->lir()->instructions_list();
+				LIR_Op*     pred_last_op = pred_instructions->last();
+
+				if (pred_last_op->code() == lir_branch) {
+					assert(pred_last_op->as_OpBranch() != NULL, "branch must be LIR_OpBranch");
+					LIR_OpBranch* pred_last_branch = (LIR_OpBranch*)pred_last_op;
+
+					if (pred_last_branch->block() == block && pred_last_branch->cond() == lir_cond_always && pred_last_branch->info() == NULL) {
+						// replace the jump to a return with a direct return
+						// Note: currently the edge between the blocks is not deleted
+						pred_instructions->at_put(pred_instructions->length() - 1, new LIR_Op1(lir_return, return_opr));
 #ifdef ASSERT
-            return_converted.set_bit(pred->block_id());
+						return_converted.set_bit(pred->block_id());
 #endif
-          }
-        }
-      }
-    }
-  }
+					}
+				}
+			}
+		}
+	}
 }


 #ifdef ASSERT
 void ControlFlowOptimizer::verify(BlockList* code) {
-  for (int i = 0; i < code->length(); i++) {
-    BlockBegin* block = code->at(i);
-    LIR_OpList* instructions = block->lir()->instructions_list();
-
-    int j;
-    for (j = 0; j < instructions->length(); j++) {
-      LIR_OpBranch* op_branch = instructions->at(j)->as_OpBranch();
-
-      if (op_branch != NULL) {
-        assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid");
-        assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid");
-      }
-    }
-
-    for (j = 0; j < block->number_of_sux() - 1; j++) {
-      BlockBegin* sux = block->sux_at(j);
-      assert(code->index_of(sux) != -1, "successor not valid");
-    }
-
-    for (j = 0; j < block->number_of_preds() - 1; j++) {
-      BlockBegin* pred = block->pred_at(j);
-      assert(code->index_of(pred) != -1, "successor not valid");
-    }
-  }
+	for (int i = 0; i < code->length(); i++) {
+		BlockBegin* block = code->at(i);
+		LIR_OpList* instructions = block->lir()->instructions_list();
+
+		int j;
+		for (j = 0; j < instructions->length(); j++) {
+			LIR_OpBranch* op_branch = instructions->at(j)->as_OpBranch();
+
+			if (op_branch != NULL) {
+				assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid");
+				assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid");
+			}
+		}
+
+		for (j = 0; j < block->number_of_sux() - 1; j++) {
+			BlockBegin* sux = block->sux_at(j);
+			assert(code->index_of(sux) != -1, "successor not valid");
+		}
+
+		for (j = 0; j < block->number_of_preds() - 1; j++) {
+			BlockBegin* pred = block->pred_at(j);
+			assert(code->index_of(pred) != -1, "successor not valid");
+		}
+	}
 }
 #endif

@@ -6165,379 +6170,379 @@
 // Implementation of LinearStatistic

 const char* LinearScanStatistic::counter_name(int counter_idx) {
-  switch (counter_idx) {
-    case counter_method:          return "compiled methods";
-    case counter_fpu_method:      return "methods using fpu";
-    case counter_loop_method:     return "methods with loops";
-    case counter_exception_method:return "methods with xhandler";
-
-    case counter_loop:            return "loops";
-    case counter_block:           return "blocks";
-    case counter_loop_block:      return "blocks inside loop";
-    case counter_exception_block: return "exception handler entries";
-    case counter_interval:        return "intervals";
-    case counter_fixed_interval:  return "fixed intervals";
-    case counter_range:           return "ranges";
-    case counter_fixed_range:     return "fixed ranges";
-    case counter_use_pos:         return "use positions";
-    case counter_fixed_use_pos:   return "fixed use positions";
-    case counter_spill_slots:     return "spill slots";
-
-    // counter for classes of lir instructions
-    case counter_instruction:     return "total instructions";
-    case counter_label:           return "labels";
-    case counter_entry:           return "method entries";
-    case counter_return:          return "method returns";
-    case counter_call:            return "method calls";
-    case counter_move:            return "moves";
-    case counter_cmp:             return "compare";
-    case counter_cond_branch:     return "conditional branches";
-    case counter_uncond_branch:   return "unconditional branches";
-    case counter_stub_branch:     return "branches to stub";
-    case counter_alu:             return "artithmetic + logic";
-    case counter_alloc:           return "allocations";
-    case counter_sync:            return "synchronisation";
-    case counter_throw:           return "throw";
-    case counter_unwind:          return "unwind";
-    case counter_typecheck:       return "type+null-checks";
-    case counter_fpu_stack:       return "fpu-stack";
-    case counter_misc_inst:       return "other instructions";
-    case counter_other_inst:      return "misc. instructions";
-
-    // counter for different types of moves
-    case counter_move_total:      return "total moves";
-    case counter_move_reg_reg:    return "register->register";
-    case counter_move_reg_stack:  return "register->stack";
-    case counter_move_stack_reg:  return "stack->register";
-    case counter_move_stack_stack:return "stack->stack";
-    case counter_move_reg_mem:    return "register->memory";
-    case counter_move_mem_reg:    return "memory->register";
-    case counter_move_const_any:  return "constant->any";
-
-    case blank_line_1:            return "";
-    case blank_line_2:            return "";
-
-    default: ShouldNotReachHere(); return "";
-  }
+	switch (counter_idx) {
+		case counter_method:          return "compiled methods";
+		case counter_fpu_method:      return "methods using fpu";
+		case counter_loop_method:     return "methods with loops";
+		case counter_exception_method:return "methods with xhandler";
+
+		case counter_loop:            return "loops";
+		case counter_block:           return "blocks";
+		case counter_loop_block:      return "blocks inside loop";
+		case counter_exception_block: return "exception handler entries";
+		case counter_interval:        return "intervals";
+		case counter_fixed_interval:  return "fixed intervals";
+		case counter_range:           return "ranges";
+		case counter_fixed_range:     return "fixed ranges";
+		case counter_use_pos:         return "use positions";
+		case counter_fixed_use_pos:   return "fixed use positions";
+		case counter_spill_slots:     return "spill slots";
+
+					      // counter for classes of lir instructions
+		case counter_instruction:     return "total instructions";
+		case counter_label:           return "labels";
+		case counter_entry:           return "method entries";
+		case counter_return:          return "method returns";
+		case counter_call:            return "method calls";
+		case counter_move:            return "moves";
+		case counter_cmp:             return "compare";
+		case counter_cond_branch:     return "conditional branches";
+		case counter_uncond_branch:   return "unconditional branches";
+		case counter_stub_branch:     return "branches to stub";
+		case counter_alu:             return "artithmetic + logic";
+		case counter_alloc:           return "allocations";
+		case counter_sync:            return "synchronisation";
+		case counter_throw:           return "throw";
+		case counter_unwind:          return "unwind";
+		case counter_typecheck:       return "type+null-checks";
+		case counter_fpu_stack:       return "fpu-stack";
+		case counter_misc_inst:       return "other instructions";
+		case counter_other_inst:      return "misc. instructions";
+
+					      // counter for different types of moves
+		case counter_move_total:      return "total moves";
+		case counter_move_reg_reg:    return "register->register";
+		case counter_move_reg_stack:  return "register->stack";
+		case counter_move_stack_reg:  return "stack->register";
+		case counter_move_stack_stack:return "stack->stack";
+		case counter_move_reg_mem:    return "register->memory";
+		case counter_move_mem_reg:    return "memory->register";
+		case counter_move_const_any:  return "constant->any";
+
+		case blank_line_1:            return "";
+		case blank_line_2:            return "";
+
+		default: ShouldNotReachHere(); return "";
+	}
 }

 LinearScanStatistic::Counter LinearScanStatistic::base_counter(int counter_idx) {
-  if (counter_idx == counter_fpu_method || counter_idx == counter_loop_method || counter_idx == counter_exception_method) {
-    return counter_method;
-  } else if (counter_idx == counter_loop_block || counter_idx == counter_exception_block) {
-    return counter_block;
-  } else if (counter_idx >= counter_instruction && counter_idx <= counter_other_inst) {
-    return counter_instruction;
-  } else if (counter_idx >= counter_move_total && counter_idx <= counter_move_const_any) {
-    return counter_move_total;
-  }
-  return invalid_counter;
+	if (counter_idx == counter_fpu_method || counter_idx == counter_loop_method || counter_idx == counter_exception_method) {
+		return counter_method;
+	} else if (counter_idx == counter_loop_block || counter_idx == counter_exception_block) {
+		return counter_block;
+	} else if (counter_idx >= counter_instruction && counter_idx <= counter_other_inst) {
+		return counter_instruction;
+	} else if (counter_idx >= counter_move_total && counter_idx <= counter_move_const_any) {
+		return counter_move_total;
+	}
+	return invalid_counter;
 }

 LinearScanStatistic::LinearScanStatistic() {
-  for (int i = 0; i < number_of_counters; i++) {
-    _counters_sum[i] = 0;
-    _counters_max[i] = -1;
-  }
+	for (int i = 0; i < number_of_counters; i++) {
+		_counters_sum[i] = 0;
+		_counters_max[i] = -1;
+	}

 }

 // add the method-local numbers to the total sum
 void LinearScanStatistic::sum_up(LinearScanStatistic &method_statistic) {
-  for (int i = 0; i < number_of_counters; i++) {
-    _counters_sum[i] += method_statistic._counters_sum[i];
-    _counters_max[i] = MAX2(_counters_max[i], method_statistic._counters_sum[i]);
-  }
+	for (int i = 0; i < number_of_counters; i++) {
+		_counters_sum[i] += method_statistic._counters_sum[i];
+		_counters_max[i] = MAX2(_counters_max[i], method_statistic._counters_sum[i]);
+	}
 }

 void LinearScanStatistic::print(const char* title) {
-  if (CountLinearScan || TraceLinearScanLevel > 0) {
-    tty->cr();
-    tty->print_cr("***** LinearScan statistic - %s *****", title);
-
-    for (int i = 0; i < number_of_counters; i++) {
-      if (_counters_sum[i] > 0 || _counters_max[i] >= 0) {
-        tty->print("%25s: %8d", counter_name(i), _counters_sum[i]);
-
-        if (base_counter(i) != invalid_counter) {
-          tty->print("  (%5.1f%%) ", _counters_sum[i] * 100.0 / _counters_sum[base_counter(i)]);
-        } else {
-          tty->print("           ");
-        }
-
-        if (_counters_max[i] >= 0) {
-          tty->print("%8d", _counters_max[i]);
-        }
-      }
-      tty->cr();
-    }
-  }
+	if (CountLinearScan || TraceLinearScanLevel > 0) {
+		tty->cr();
+		tty->print_cr("***** LinearScan statistic - %s *****", title);
+
+		for (int i = 0; i < number_of_counters; i++) {
+			if (_counters_sum[i] > 0 || _counters_max[i] >= 0) {
+				tty->print("%25s: %8d", counter_name(i), _counters_sum[i]);
+
+				if (base_counter(i) != invalid_counter) {
+					tty->print("  (%5.1f%%) ", _counters_sum[i] * 100.0 / _counters_sum[base_counter(i)]);
+				} else {
+					tty->print("           ");
+				}
+
+				if (_counters_max[i] >= 0) {
+					tty->print("%8d", _counters_max[i]);
+				}
+			}
+			tty->cr();
+		}
+	}
 }

 void LinearScanStatistic::collect(LinearScan* allocator) {
-  inc_counter(counter_method);
-  if (allocator->has_fpu_registers()) {
-    inc_counter(counter_fpu_method);
-  }
-  if (allocator->num_loops() > 0) {
-    inc_counter(counter_loop_method);
-  }
-  inc_counter(counter_loop, allocator->num_loops());
-  inc_counter(counter_spill_slots, allocator->max_spills());
-
-  int i;
-  for (i = 0; i < allocator->interval_count(); i++) {
-    Interval* cur = allocator->interval_at(i);
-
-    if (cur != NULL) {
-      inc_counter(counter_interval);
-      inc_counter(counter_use_pos, cur->num_use_positions());
-      if (LinearScan::is_precolored_interval(cur)) {
-        inc_counter(counter_fixed_interval);
-        inc_counter(counter_fixed_use_pos, cur->num_use_positions());
-      }
-
-      Range* range = cur->first();
-      while (range != Range::end()) {
-        inc_counter(counter_range);
-        if (LinearScan::is_precolored_interval(cur)) {
-          inc_counter(counter_fixed_range);
-        }
-        range = range->next();
-      }
-    }
-  }
-
-  bool has_xhandlers = false;
-  // Note: only count blocks that are in code-emit order
-  for (i = 0; i < allocator->ir()->code()->length(); i++) {
-    BlockBegin* cur = allocator->ir()->code()->at(i);
-
-    inc_counter(counter_block);
-    if (cur->loop_depth() > 0) {
-      inc_counter(counter_loop_block);
-    }
-    if (cur->is_set(BlockBegin::exception_entry_flag)) {
-      inc_counter(counter_exception_block);
-      has_xhandlers = true;
-    }
-
-    LIR_OpList* instructions = cur->lir()->instructions_list();
-    for (int j = 0; j < instructions->length(); j++) {
-      LIR_Op* op = instructions->at(j);
-
-      inc_counter(counter_instruction);
-
-      switch (op->code()) {
-        case lir_label:           inc_counter(counter_label); break;
-        case lir_std_entry:
-        case lir_osr_entry:       inc_counter(counter_entry); break;
-        case lir_return:          inc_counter(counter_return); break;
-
-        case lir_rtcall:
-        case lir_static_call:
-        case lir_optvirtual_call:
-        case lir_virtual_call:    inc_counter(counter_call); break;
-
-        case lir_move: {
-          inc_counter(counter_move);
-          inc_counter(counter_move_total);
-
-          LIR_Opr in = op->as_Op1()->in_opr();
-          LIR_Opr res = op->as_Op1()->result_opr();
-          if (in->is_register()) {
-            if (res->is_register()) {
-              inc_counter(counter_move_reg_reg);
-            } else if (res->is_stack()) {
-              inc_counter(counter_move_reg_stack);
-            } else if (res->is_address()) {
-              inc_counter(counter_move_reg_mem);
-            } else {
-              ShouldNotReachHere();
-            }
-          } else if (in->is_stack()) {
-            if (res->is_register()) {
-              inc_counter(counter_move_stack_reg);
-            } else {
-              inc_counter(counter_move_stack_stack);
-            }
-          } else if (in->is_address()) {
-            assert(res->is_register(), "must be");
-            inc_counter(counter_move_mem_reg);
-          } else if (in->is_constant()) {
-            inc_counter(counter_move_const_any);
-          } else {
-            ShouldNotReachHere();
-          }
-          break;
-        }
-
-        case lir_cmp:             inc_counter(counter_cmp); break;
-
-        case lir_branch:
-        case lir_cond_float_branch: {
-          LIR_OpBranch* branch = op->as_OpBranch();
-          if (branch->block() == NULL) {
-            inc_counter(counter_stub_branch);
-          } else if (branch->cond() == lir_cond_always) {
-            inc_counter(counter_uncond_branch);
-          } else {
-            inc_counter(counter_cond_branch);
-          }
-          break;
-        }
-
-        case lir_neg:
-        case lir_add:
-        case lir_sub:
-        case lir_mul:
-        case lir_mul_strictfp:
-        case lir_div:
-        case lir_div_strictfp:
-        case lir_rem:
-        case lir_sqrt:
-        case lir_sin:
-        case lir_cos:
-        case lir_abs:
-        case lir_log10:
-        case lir_log:
-        case lir_logic_and:
-        case lir_logic_or:
-        case lir_logic_xor:
-        case lir_shl:
-        case lir_shr:
-        case lir_ushr:            inc_counter(counter_alu); break;
-
-        case lir_alloc_object:
-        case lir_alloc_array:     inc_counter(counter_alloc); break;
-
-        case lir_monaddr:
-        case lir_lock:
-        case lir_unlock:          inc_counter(counter_sync); break;
-
-        case lir_throw:           inc_counter(counter_throw); break;
-
-        case lir_unwind:          inc_counter(counter_unwind); break;
-
-        case lir_null_check:
-        case lir_leal:
-        case lir_instanceof:
-        case lir_checkcast:
-        case lir_store_check:     inc_counter(counter_typecheck); break;
-
-        case lir_fpop_raw:
-        case lir_fxch:
-        case lir_fld:             inc_counter(counter_fpu_stack); break;
-
-        case lir_nop:
-        case lir_push:
-        case lir_pop:
-        case lir_convert:
-        case lir_roundfp:
-        case lir_cmove:           inc_counter(counter_misc_inst); break;
-
-        default:                  inc_counter(counter_other_inst); break;
-      }
-    }
-  }
-
-  if (has_xhandlers) {
-    inc_counter(counter_exception_method);
-  }
+	inc_counter(counter_method);
+	if (allocator->has_fpu_registers()) {
+		inc_counter(counter_fpu_method);
+	}
+	if (allocator->num_loops() > 0) {
+		inc_counter(counter_loop_method);
+	}
+	inc_counter(counter_loop, allocator->num_loops());
+	inc_counter(counter_spill_slots, allocator->max_spills());
+
+	int i;
+	for (i = 0; i < allocator->interval_count(); i++) {
+		Interval* cur = allocator->interval_at(i);
+
+		if (cur != NULL) {
+			inc_counter(counter_interval);
+			inc_counter(counter_use_pos, cur->num_use_positions());
+			if (LinearScan::is_precolored_interval(cur)) {
+				inc_counter(counter_fixed_interval);
+				inc_counter(counter_fixed_use_pos, cur->num_use_positions());
+			}
+
+			Range* range = cur->first();
+			while (range != Range::end()) {
+				inc_counter(counter_range);
+				if (LinearScan::is_precolored_interval(cur)) {
+					inc_counter(counter_fixed_range);
+				}
+				range = range->next();
+			}
+		}
+	}
+
+	bool has_xhandlers = false;
+	// Note: only count blocks that are in code-emit order
+	for (i = 0; i < allocator->ir()->code()->length(); i++) {
+		BlockBegin* cur = allocator->ir()->code()->at(i);
+
+		inc_counter(counter_block);
+		if (cur->loop_depth() > 0) {
+			inc_counter(counter_loop_block);
+		}
+		if (cur->is_set(BlockBegin::exception_entry_flag)) {
+			inc_counter(counter_exception_block);
+			has_xhandlers = true;
+		}
+
+		LIR_OpList* instructions = cur->lir()->instructions_list();
+		for (int j = 0; j < instructions->length(); j++) {
+			LIR_Op* op = instructions->at(j);
+
+			inc_counter(counter_instruction);
+
+			switch (op->code()) {
+				case lir_label:           inc_counter(counter_label); break;
+				case lir_std_entry:
+				case lir_osr_entry:       inc_counter(counter_entry); break;
+				case lir_return:          inc_counter(counter_return); break;
+
+				case lir_rtcall:
+				case lir_static_call:
+				case lir_optvirtual_call:
+				case lir_virtual_call:    inc_counter(counter_call); break;
+
+				case lir_move: {
+						       inc_counter(counter_move);
+						       inc_counter(counter_move_total);
+
+						       LIR_Opr in = op->as_Op1()->in_opr();
+						       LIR_Opr res = op->as_Op1()->result_opr();
+						       if (in->is_register()) {
+							       if (res->is_register()) {
+								       inc_counter(counter_move_reg_reg);
+							       } else if (res->is_stack()) {
+								       inc_counter(counter_move_reg_stack);
+							       } else if (res->is_address()) {
+								       inc_counter(counter_move_reg_mem);
+							       } else {
+								       ShouldNotReachHere();
+							       }
+						       } else if (in->is_stack()) {
+							       if (res->is_register()) {
+								       inc_counter(counter_move_stack_reg);
+							       } else {
+								       inc_counter(counter_move_stack_stack);
+							       }
+						       } else if (in->is_address()) {
+							       assert(res->is_register(), "must be");
+							       inc_counter(counter_move_mem_reg);
+						       } else if (in->is_constant()) {
+							       inc_counter(counter_move_const_any);
+						       } else {
+							       ShouldNotReachHere();
+						       }
+						       break;
+					       }
+
+				case lir_cmp:             inc_counter(counter_cmp); break;
+
+				case lir_branch:
+				case lir_cond_float_branch: {
+								    LIR_OpBranch* branch = op->as_OpBranch();
+								    if (branch->block() == NULL) {
+									    inc_counter(counter_stub_branch);
+								    } else if (branch->cond() == lir_cond_always) {
+									    inc_counter(counter_uncond_branch);
+								    } else {
+									    inc_counter(counter_cond_branch);
+								    }
+								    break;
+							    }
+
+				case lir_neg:
+				case lir_add:
+				case lir_sub:
+				case lir_mul:
+				case lir_mul_strictfp:
+				case lir_div:
+				case lir_div_strictfp:
+				case lir_rem:
+				case lir_sqrt:
+				case lir_sin:
+				case lir_cos:
+				case lir_abs:
+				case lir_log10:
+				case lir_log:
+				case lir_logic_and:
+				case lir_logic_or:
+				case lir_logic_xor:
+				case lir_shl:
+				case lir_shr:
+				case lir_ushr:            inc_counter(counter_alu); break;
+
+				case lir_alloc_object:
+				case lir_alloc_array:     inc_counter(counter_alloc); break;
+
+				case lir_monaddr:
+				case lir_lock:
+				case lir_unlock:          inc_counter(counter_sync); break;
+
+				case lir_throw:           inc_counter(counter_throw); break;
+
+				case lir_unwind:          inc_counter(counter_unwind); break;
+
+				case lir_null_check:
+				case lir_leal:
+				case lir_instanceof:
+				case lir_checkcast:
+				case lir_store_check:     inc_counter(counter_typecheck); break;
+
+				case lir_fpop_raw:
+				case lir_fxch:
+				case lir_fld:             inc_counter(counter_fpu_stack); break;
+
+				case lir_nop:
+				case lir_push:
+				case lir_pop:
+				case lir_convert:
+				case lir_roundfp:
+				case lir_cmove:           inc_counter(counter_misc_inst); break;
+
+				default:                  inc_counter(counter_other_inst); break;
+			}
+		}
+	}
+
+	if (has_xhandlers) {
+		inc_counter(counter_exception_method);
+	}
 }

 void LinearScanStatistic::compute(LinearScan* allocator, LinearScanStatistic &global_statistic) {
-  if (CountLinearScan || TraceLinearScanLevel > 0) {
-
-    LinearScanStatistic local_statistic = LinearScanStatistic();
-
-    local_statistic.collect(allocator);
-    global_statistic.sum_up(local_statistic);
-
-    if (TraceLinearScanLevel > 2) {
-      local_statistic.print("current local statistic");
-    }
-  }
+	if (CountLinearScan || TraceLinearScanLevel > 0) {
+
+		LinearScanStatistic local_statistic = LinearScanStatistic();
+
+		local_statistic.collect(allocator);
+		global_statistic.sum_up(local_statistic);
+
+		if (TraceLinearScanLevel > 2) {
+			local_statistic.print("current local statistic");
+		}
+	}
 }


 // Implementation of LinearTimers

 LinearScanTimers::LinearScanTimers() {
-  for (int i = 0; i < number_of_timers; i++) {
-    timer(i)->reset();
-  }
+	for (int i = 0; i < number_of_timers; i++) {
+		timer(i)->reset();
+	}
 }

 const char* LinearScanTimers::timer_name(int idx) {
-  switch (idx) {
-    case timer_do_nothing:               return "Nothing (Time Check)";
-    case timer_number_instructions:      return "Number Instructions";
-    case timer_compute_local_live_sets:  return "Local Live Sets";
-    case timer_compute_global_live_sets: return "Global Live Sets";
-    case timer_build_intervals:          return "Build Intervals";
-    case timer_sort_intervals_before:    return "Sort Intervals Before";
-    case timer_allocate_registers:       return "Allocate Registers";
-    case timer_resolve_data_flow:        return "Resolve Data Flow";
-    case timer_sort_intervals_after:     return "Sort Intervals After";
-    case timer_eliminate_spill_moves:    return "Spill optimization";
-    case timer_assign_reg_num:           return "Assign Reg Num";
-    case timer_allocate_fpu_stack:       return "Allocate FPU Stack";
-    case timer_optimize_lir:             return "Optimize LIR";
-    default: ShouldNotReachHere();       return "";
-  }
+	switch (idx) {
+		case timer_do_nothing:               return "Nothing (Time Check)";
+		case timer_number_instructions:      return "Number Instructions";
+		case timer_compute_local_live_sets:  return "Local Live Sets";
+		case timer_compute_global_live_sets: return "Global Live Sets";
+		case timer_build_intervals:          return "Build Intervals";
+		case timer_sort_intervals_before:    return "Sort Intervals Before";
+		case timer_allocate_registers:       return "Allocate Registers";
+		case timer_resolve_data_flow:        return "Resolve Data Flow";
+		case timer_sort_intervals_after:     return "Sort Intervals After";
+		case timer_eliminate_spill_moves:    return "Spill optimization";
+		case timer_assign_reg_num:           return "Assign Reg Num";
+		case timer_allocate_fpu_stack:       return "Allocate FPU Stack";
+		case timer_optimize_lir:             return "Optimize LIR";
+		default: ShouldNotReachHere();       return "";
+	}
 }

 void LinearScanTimers::begin_method() {
-  if (TimeEachLinearScan) {
-    // reset all timers to measure only current method
-    for (int i = 0; i < number_of_timers; i++) {
-      timer(i)->reset();
-    }
-  }
+	if (TimeEachLinearScan) {
+		// reset all timers to measure only current method
+		for (int i = 0; i < number_of_timers; i++) {
+			timer(i)->reset();
+		}
+	}
 }

 void LinearScanTimers::end_method(LinearScan* allocator) {
-  if (TimeEachLinearScan) {
-
-    double c = timer(timer_do_nothing)->seconds();
-    double total = 0;
-    for (int i = 1; i < number_of_timers; i++) {
-      total += timer(i)->seconds() - c;
-    }
-
-    if (total >= 0.0005) {
-      // print all information in one line for automatic processing
-      tty->print("@"); allocator->compilation()->method()->print_name();
-
-      tty->print("@ %d ", allocator->compilation()->method()->code_size());
-      tty->print("@ %d ", allocator->block_at(allocator->block_count() - 1)->last_lir_instruction_id() / 2);
-      tty->print("@ %d ", allocator->block_count());
-      tty->print("@ %d ", allocator->num_virtual_regs());
-      tty->print("@ %d ", allocator->interval_count());
-      tty->print("@ %d ", allocator->_num_calls);
-      tty->print("@ %d ", allocator->num_loops());
-
-      tty->print("@ %6.6f ", total);
-      for (int i = 1; i < number_of_timers; i++) {
-        tty->print("@ %4.1f ", ((timer(i)->seconds() - c) / total) * 100);
-      }
-      tty->cr();
-    }
-  }
+	if (TimeEachLinearScan) {
+
+		double c = timer(timer_do_nothing)->seconds();
+		double total = 0;
+		for (int i = 1; i < number_of_timers; i++) {
+			total += timer(i)->seconds() - c;
+		}
+
+		if (total >= 0.0005) {
+			// print all information in one line for automatic processing
+			tty->print("@"); allocator->compilation()->method()->print_name();
+
+			tty->print("@ %d ", allocator->compilation()->method()->code_size());
+			tty->print("@ %d ", allocator->block_at(allocator->block_count() - 1)->last_lir_instruction_id() / 2);
+			tty->print("@ %d ", allocator->block_count());
+			tty->print("@ %d ", allocator->num_virtual_regs());
+			tty->print("@ %d ", allocator->interval_count());
+			tty->print("@ %d ", allocator->_num_calls);
+			tty->print("@ %d ", allocator->num_loops());
+
+			tty->print("@ %6.6f ", total);
+			for (int i = 1; i < number_of_timers; i++) {
+				tty->print("@ %4.1f ", ((timer(i)->seconds() - c) / total) * 100);
+			}
+			tty->cr();
+		}
+	}
 }

 void LinearScanTimers::print(double total_time) {
-  if (TimeLinearScan) {
-    // correction value: sum of dummy-timer that only measures the time that
-    // is necesary to start and stop itself
-    double c = timer(timer_do_nothing)->seconds();
-
-    for (int i = 0; i < number_of_timers; i++) {
-      double t = timer(i)->seconds();
-      tty->print_cr("    %25s: %6.3f s (%4.1f%%)  corrected: %6.3f s (%4.1f%%)", timer_name(i), t, (t / total_time) * 100.0, t - c, (t - c) / (total_time - 2 * number_of_timers * c) * 100);
-    }
-  }
+	if (TimeLinearScan) {
+		// correction value: sum of dummy-timer that only measures the time that
+		// is necesary to start and stop itself
+		double c = timer(timer_do_nothing)->seconds();
+
+		for (int i = 0; i < number_of_timers; i++) {
+			double t = timer(i)->seconds();
+			tty->print_cr("    %25s: %6.3f s (%4.1f%%)  corrected: %6.3f s (%4.1f%%)", timer_name(i), t, (t / total_time) * 100.0, t - c, (t - c) / (total_time - 2 * number_of_timers * c) * 100);
+		}
+	}
 }

 #endif // #ifndef PRODUCT
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -304,6 +304,7 @@
   //       (This may have to change if this code changes!)
   assert(oop(klass)->is_klass(), "not a class");
   BasicType elt_type = typeArrayKlass::cast(klass)->element_type();
+
   oop obj = oopFactory::new_typeArray(elt_type, length, CHECK);
   thread->set_vm_result(obj);
   // This is pretty rare but this runtime patch is stressful to deoptimization
@@ -743,7 +744,6 @@
   // Note also that in the presence of inlining it is not guaranteed
   // that caller_method() == caller_code->method()

-
   int bci = vfst.bci();

   Events::log("patch_code @ " INTPTR_FORMAT , caller_frame.pc());
@@ -787,6 +787,7 @@
       case Bytecodes::_getstatic:
         { klassOop klass = resolve_field_return_klass(caller_method, bci, CHECK);
           // Save a reference to the class that has to be checked for initialization
+
           init_klass = KlassHandle(THREAD, klass);
           k = klass;
         }
@@ -829,7 +830,7 @@
         break;
       default: Unimplemented();
     }
-    // convert to handle
+    // convert to handle
     load_klass = Handle(THREAD, k);
   } else {
     ShouldNotReachHere();
@@ -883,11 +884,13 @@
         //    ....             <-- call destination

         address stub_location = caller_frame.pc() + PatchingStub::patch_info_offset();
+
         unsigned char* byte_count = (unsigned char*) (stub_location - 1);
         unsigned char* byte_skip = (unsigned char*) (stub_location - 2);
         unsigned char* being_initialized_entry_offset = (unsigned char*) (stub_location - 3);
         address copy_buff = stub_location - *byte_skip - *byte_count;
         address being_initialized_entry = stub_location - *being_initialized_entry_offset;
+
         if (TracePatching) {
           tty->print_cr(" Patching %s at bci %d at address 0x%x  (%s)", Bytecodes::name(code), bci,
                         instr_pc, (stub_id == Runtime1::access_field_patching_id) ? "field" : "klass");
@@ -901,7 +904,6 @@
           assert(map != NULL, "null check");
           map->print();
           tty->cr();
-
           Disassembler::decode(copy_buff, copy_buff + *byte_count, tty);
         }
         // depending on the code below, do_patch says whether to copy the patch body back into the nmethod
@@ -966,6 +968,7 @@
         if (do_patch) {
           // replace instructions
           // first replace the tail, then the call
+
           for (int i = NativeCall::instruction_size; i < *byte_count; i++) {
             address ptr = copy_buff + i;
             int a_byte = (*ptr) & 0xFF;
@@ -986,7 +989,7 @@
             RelocIterator iter(nm, (address)instr_pc, (address)(instr_pc + 1));
             relocInfo::change_reloc_info_for_address(&iter, (address) instr_pc,
                                                      relocInfo::none, relocInfo::oop_type);
-#ifdef SPARC
+#if defined(SPARC)
             // Sparc takes two relocations for an oop so update the second one.
             address instr_pc2 = instr_pc + NativeMovConstReg::add_offset;
             RelocIterator iter2(nm, instr_pc2, instr_pc2 + 1);
@@ -1021,7 +1024,7 @@
   debug_only(NoHandleMark nhm;)
   {
     // Enter VM mode
-
+
     ResetNoHandleMark rnhm;
     patch_code(thread, load_klass_patching_id);
   }
@@ -1051,6 +1054,7 @@
   {
     // Enter VM mode

+    printf("%s %d\n", __FILE__, __LINE__);
     ResetNoHandleMark rnhm;
     patch_code(thread, access_field_patching_id);
   }
--- a/hotspot/src/share/vm/c1/c1_globals.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/c1/c1_globals.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -81,7 +81,8 @@
   develop(bool, SelectivePhiFunctions, true,                                \
           "create phi functions at loop headers only when necessary")       \
                                                                             \
-  develop(bool, DoCEE, true,                                                \
+/*by_css*/									\
+  develop(bool, DoCEE, NOT_MIPS32(true) MIPS32_ONLY(false),                 \
           "Do Conditional Expression Elimination to simplify CFG")          \
                                                                             \
   develop(bool, PrintCEE, false,                                            \
--- a/hotspot/src/share/vm/ci/ciEnv.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/ci/ciEnv.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -839,7 +840,6 @@

     assert(offsets->value(CodeOffsets::Deopt) != -1, "must have deopt entry");
     assert(offsets->value(CodeOffsets::Exceptions) != -1, "must have exception entry");
-
     nm =  nmethod::new_nmethod(method,
                                compile_id(),
                                entry_bci,
@@ -859,8 +859,13 @@
     // general stress testing
     if (nm != NULL && StressNonEntrant) {
       MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
-      NativeJump::patch_verified_entry(nm->entry_point(), nm->verified_entry_point(),
-                  SharedRuntime::get_handle_wrong_method_stub());
+#ifndef MIPS32
+		  NativeJump::patch_verified_entry(nm->entry_point(), nm->verified_entry_point(),
+		              SharedRuntime::get_handle_wrong_method_stub());
+#else
+		  NativeGeneralJump::patch_verified_entry(nm->entry_point(), nm->verified_entry_point(),
+		              SharedRuntime::get_handle_wrong_method_stub());
+#endif
     }

     if (nm == NULL) {
--- a/hotspot/src/share/vm/code/compiledIC.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/code/compiledIC.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -511,7 +512,11 @@
   }

   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);   // creation also verifies the object
-  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#ifndef MIPS32
+	NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#else
+  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif

   assert(method_holder->data()    == 0           || method_holder->data()    == (intptr_t)callee(), "a) MT-unsafe modification of inline cache");
   assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, "b) MT-unsafe modification of inline cache");
@@ -574,7 +579,11 @@
   address stub = static_stub->addr();
   assert(stub!=NULL, "stub not found");
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);   // creation also verifies the object
-  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#ifndef MIPS32
+	NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#else
+	NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif
   method_holder->set_data(0);
   jump->set_jump_destination((address)-1);
 }
@@ -653,7 +662,11 @@
   address stub = find_stub();
   assert(stub != NULL, "no stub found for static call");
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);   // creation also verifies the object
-  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#ifndef MIPS32
+	NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#else
+	NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif

   // Verify state
   assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
--- a/hotspot/src/share/vm/code/nmethod.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -421,7 +421,8 @@
 }

 // %%% This variable is no longer used?
-int nmethod::_zombie_instruction_size = NativeJump::instruction_size;
+//int nmethod::_zombie_instruction_size = NativeJump::instruction_size;
+int nmethod::_zombie_instruction_size = NOT_MIPS32(NativeJump)MIPS32_ONLY(NativeGeneralJump)::instruction_size; //aoqi


 nmethod* nmethod::new_native_nmethod(methodHandle method,
@@ -495,70 +496,70 @@
 #endif // def HAVE_DTRACE_H

 nmethod* nmethod::new_nmethod(methodHandle method,
-  int compile_id,
-  int entry_bci,
-  CodeOffsets* offsets,
-  int orig_pc_offset,
-  DebugInformationRecorder* debug_info,
-  Dependencies* dependencies,
-  CodeBuffer* code_buffer, int frame_size,
-  OopMapSet* oop_maps,
-  ExceptionHandlerTable* handler_table,
-  ImplicitExceptionTable* nul_chk_table,
-  AbstractCompiler* compiler,
-  int comp_level
-)
-{
-  assert(debug_info->oop_recorder() == code_buffer->oop_recorder(), "shared OR");
-  // create nmethod
-  nmethod* nm = NULL;
-  { MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    int nmethod_size =
-      allocation_size(code_buffer, sizeof(nmethod))
-      + adjust_pcs_size(debug_info->pcs_size())
-      + round_to(dependencies->size_in_bytes() , oopSize)
-      + round_to(handler_table->size_in_bytes(), oopSize)
-      + round_to(nul_chk_table->size_in_bytes(), oopSize)
-      + round_to(debug_info->data_size()       , oopSize);
-    nm = new (nmethod_size)
-      nmethod(method(), nmethod_size, compile_id, entry_bci, offsets,
-              orig_pc_offset, debug_info, dependencies, code_buffer, frame_size,
-              oop_maps,
-              handler_table,
-              nul_chk_table,
-              compiler,
-              comp_level);
-    if (nm != NULL) {
-      // To make dependency checking during class loading fast, record
-      // the nmethod dependencies in the classes it is dependent on.
-      // This allows the dependency checking code to simply walk the
-      // class hierarchy above the loaded class, checking only nmethods
-      // which are dependent on those classes.  The slow way is to
-      // check every nmethod for dependencies which makes it linear in
-      // the number of methods compiled.  For applications with a lot
-      // classes the slow way is too slow.
-      for (Dependencies::DepStream deps(nm); deps.next(); ) {
-        klassOop klass = deps.context_type();
-        if (klass == NULL)  continue;  // ignore things like evol_method
+		int compile_id,
+		int entry_bci,
+		CodeOffsets* offsets,
+		int orig_pc_offset,
+		DebugInformationRecorder* debug_info,
+		Dependencies* dependencies,
+		CodeBuffer* code_buffer, int frame_size,
+		OopMapSet* oop_maps,
+		ExceptionHandlerTable* handler_table,
+		ImplicitExceptionTable* nul_chk_table,
+		AbstractCompiler* compiler,
+		int comp_level
+		) {
+	assert(debug_info->oop_recorder() == code_buffer->oop_recorder(), "shared OR");
+	// create nmethod
+	nmethod* nm = NULL;
+	{
+		MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+		int nmethod_size =
+			allocation_size(code_buffer, sizeof(nmethod))
+			+ adjust_pcs_size(debug_info->pcs_size())
+			+ round_to(dependencies->size_in_bytes() , oopSize)
+			+ round_to(handler_table->size_in_bytes(), oopSize)
+			+ round_to(nul_chk_table->size_in_bytes(), oopSize)
+			+ round_to(debug_info->data_size()       , oopSize);
+		nm = new (nmethod_size)
+			nmethod(method(), nmethod_size, compile_id, entry_bci, offsets,
+					orig_pc_offset, debug_info, dependencies, code_buffer, frame_size,
+					oop_maps,
+					handler_table,
+					nul_chk_table,
+					compiler,
+					comp_level);
+		if (nm != NULL) {
+			// To make dependency checking during class loading fast, record
+			// the nmethod dependencies in the classes it is dependent on.
+			// This allows the dependency checking code to simply walk the
+			// class hierarchy above the loaded class, checking only nmethods
+			// which are dependent on those classes.  The slow way is to
+			// check every nmethod for dependencies which makes it linear in
+			// the number of methods compiled.  For applications with a lot
+			// classes the slow way is too slow.
+			for (Dependencies::DepStream deps(nm); deps.next(); ) {
+				klassOop klass = deps.context_type();
+				if (klass == NULL)  continue;  // ignore things like evol_method

-        // record this nmethod as dependent on this klass
-        instanceKlass::cast(klass)->add_dependent_nmethod(nm);
-      }
-    }
-    NOT_PRODUCT(if (nm != NULL)  nmethod_stats.note_nmethod(nm));
-    if (PrintAssembly && nm != NULL)
-      Disassembler::decode(nm);
-  }
+				// record this nmethod as dependent on this klass
+				instanceKlass::cast(klass)->add_dependent_nmethod(nm);
+			}
+		}
+		NOT_PRODUCT(if (nm != NULL)  nmethod_stats.note_nmethod(nm));
+		if (PrintAssembly && nm != NULL)
+			Disassembler::decode(nm);
+	}

-  // verify nmethod
-  debug_only(if (nm) nm->verify();) // might block
+	// verify nmethod
+	debug_only(if (nm) nm->verify();) // might block

-  if (nm != NULL) {
-    nm->log_new_nmethod();
-  }
+	if (nm != NULL) {
+		nm->log_new_nmethod();
+	}

-  // done
-  return nm;
+	// done
+	return nm;
 }


@@ -995,7 +996,8 @@
   // not-entrant methods.
   address low_boundary = verified_entry_point();
   if (!is_in_use()) {
-    low_boundary += NativeJump::instruction_size;
+    //low_boundary += NativeJump::instruction_size;
+		low_boundary += NOT_MIPS32(NativeJump)MIPS32_ONLY(NativeGeneralJump)::instruction_size;//aoqi
     // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
     // This means that the low_boundary is going to be a little too high.
     // This shouldn't matter, since oops of non-entrant methods are never used.
@@ -1173,9 +1175,12 @@
     // The caller can be calling the method statically or through an inline
     // cache call.
     if (!is_not_entrant()) {
-      NativeJump::patch_verified_entry(entry_point(), verified_entry_point(),
+      //NativeJump::patch_verified_entry(entry_point(), verified_entry_point(),
+			NOT_MIPS32(NativeJump)MIPS32_ONLY(NativeGeneralJump)
+									::patch_verified_entry(entry_point(), verified_entry_point(),
                   SharedRuntime::get_handle_wrong_method_stub());
-      assert (NativeJump::instruction_size == nmethod::_zombie_instruction_size, "");
+      //assert (NativeJump::instruction_size == nmethod::_zombie_instruction_size, "");
+      assert (NOT_MIPS32(NativeJump)MIPS32_ONLY(NativeGeneralJump)::instruction_size == nmethod::_zombie_instruction_size, ""); //aoqi
     }

     // When the nmethod becomes zombie it is no longer alive so the
@@ -1414,7 +1419,8 @@
   // not-entrant methods.
   address low_boundary = verified_entry_point();
   if (is_not_entrant()) {
-    low_boundary += NativeJump::instruction_size;
+    //low_boundary += NativeJump::instruction_size;
+    low_boundary += NOT_MIPS32(NativeJump)MIPS32_ONLY(NativeGeneralJump)::instruction_size;//aoqi
     // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
     // (See comment above.)
   }
@@ -1531,7 +1537,8 @@
   // not-entrant methods.
   address low_boundary = verified_entry_point();
   if (is_not_entrant()) {
-    low_boundary += NativeJump::instruction_size;
+    //low_boundary += NativeJump::instruction_size;
+    low_boundary += NOT_MIPS32(NativeJump)MIPS32_ONLY(NativeGeneralJump)::instruction_size;//aoqi
     // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
     // (See comment above.)
   }
@@ -1887,7 +1894,9 @@
     return;

   // Make sure all the entry points are correctly aligned for patching.
-  NativeJump::check_verified_entry_alignment(entry_point(), verified_entry_point());
+  //NativeJump::check_verified_entry_alignment(entry_point(), verified_entry_point());
+	NOT_MIPS32(NativeJump)MIPS32_ONLY(NativeGeneralJump)
+						::check_verified_entry_alignment(entry_point(), verified_entry_point());

   assert(method()->is_oop(), "must be valid");

@@ -1944,6 +1953,7 @@
   // iterate through all interrupt point
   // and verify the debug information is valid.
   RelocIterator iter((nmethod*)this);
+
   while (iter.next()) {
     address stub = NULL;
     switch (iter.type()) {
--- a/hotspot/src/share/vm/code/relocInfo.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/code/relocInfo.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -749,14 +750,21 @@

 //// miscellaneous methods
 oop* oop_Relocation::oop_addr() {
-  int n = _oop_index;
+#ifndef CORE
+	int n = _oop_index;
   if (n == 0) {
+#ifdef MIPS32
+	ShouldNotReachHere();
+#endif
     // oop is stored in the code stream
     return (oop*) pd_address_in_code();
   } else {
     // oop is stored in table at CodeBlob::oops_begin
     return code()->oop_addr_at(n);
   }
+#else
+  return NULL;
+#endif // !CORE
 }


@@ -778,6 +786,7 @@

 RelocIterator virtual_call_Relocation::parse_ic(CodeBlob* &code, address &ic_call, address &first_oop,
                                                 oop* &oop_addr, bool *is_optimized) {
+
   assert(ic_call != NULL, "ic_call address must be set");
   assert(ic_call != NULL || first_oop != NULL, "must supply a non-null input");
   if (code == NULL) {
--- a/hotspot/src/share/vm/code/relocInfo.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/code/relocInfo.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -261,8 +262,12 @@
     poll_type               = 10, // polling instruction for safepoints
     poll_return_type        = 11, // polling instruction for safepoints at return
     breakpoint_type         = 12, // an initialization barrier or safepoint
-    yet_unused_type         = 13, // Still unused
-    yet_unused_type_2       = 14, // Still unused
+#ifndef MIPS32
+		yet_unused_type         = 13, // Still unused
+#else
+		internal_pc_type        = 13, // tag for internal data, by yjl 9/19/2005
+#endif
+		yet_unused_type_2       = 14, // Still unused
     data_prefix_tag         = 15, // tag for a prefix (carries data arguments)
     type_mask               = 15  // A mask which selects only the above values
   };
@@ -302,6 +307,7 @@
     visitor(poll_return) \
     visitor(breakpoint) \
     visitor(section_word) \
+		MIPS32_ONLY(visitor(internal_pc)) \


  public:
@@ -743,6 +749,7 @@
       x0 = relocInfo::jint_data_at(0, dp, dlen);
       x1 = relocInfo::jint_data_at(2, dp, dlen);
     }
+
   }

  protected:
@@ -941,6 +948,17 @@
   // Note:  oop_value transparently converts Universe::non_oop_word to NULL.
 };

+#ifdef MIPS32
+// to handle the set_last_java_frame pc
+class internal_pc_Relocation : public Relocation {
+  relocInfo::relocType type() { return relocInfo::internal_pc_type; }
+ public:
+  address pc() {pd_get_address_from_code();}
+	//void     fix_relocation_at_move(intptr_t delta);
+	void     fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest);
+};
+#endif
+
 class virtual_call_Relocation : public CallRelocation {
   relocInfo::relocType type() { return relocInfo::virtual_call_type; }

@@ -964,7 +982,8 @@
   address _oop_limit;               // search limit for set-oop instructions

   friend class RelocIterator;
-  virtual_call_Relocation() { }
+  virtual_call_Relocation() {
+  }


  public:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/compiler/cha.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,69 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)cha.hpp	1.17 05/11/18 15:21:37 JVM"
+#endif
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+// Class Hierarchy Analysis
+// Computes the set of overriding methods for a particular call,
+// using the subclass links in instanceKlass.
+// Right now the CHA just traverses these links for every query;
+// if this should become too slow we can put in a cache.
+
+// result of a CHA query
+class CHAResult : public ResourceObj {
+  friend class CHA;
+  const KlassHandle  _receiver;                                 // copies of the lookup (for better debugging)
+  const symbolHandle _name;
+  const symbolHandle _signature;
+  const methodHandle _target;                                   // target method (if final)
+  const bool         _valid;
+  const GrowableArray<methodHandle>* const _target_methods;     // list of possible targets (NULL for final methods or if !UseCHA)
+  const GrowableArray<KlassHandle>* const  _receivers;          // list of possible receiver klasses (NULL for final methods or if !UseCHA)
+
+  CHAResult(KlassHandle receiver, symbolHandle name, symbolHandle signature,
+            GrowableArray<KlassHandle>* receivers, GrowableArray<methodHandle>* methods,
+            methodHandle target, bool valid = true);
+ public:
+  KlassHandle  receiver() const                               { return _receiver; }
+  symbolHandle name() const                                   { return _name; }
+  symbolHandle signature() const                              { return _signature; }
+  bool      is_accurate() const                               { return !_target_methods->is_full(); }
+  bool      is_monomorphic() const;
+  methodHandle monomorphic_target() const;                    // returns the single target (if is_monomorphic)
+  KlassHandle  monomorphic_receiver() const;                  // receiver klass of monomorphic_target
+  const GrowableArray<KlassHandle>*  receivers() const        { return _receivers; }
+    // Returns the list of all subclasses that are possible receivers (empty array if none, capped at max_result).
+    // The static receiver klass *is* included in the result (unless it is abstract).
+    // The list is a class hierarchy preorder, i.e., subclasses precede their superclass.
+    // All possible receiver classes are included, not just those that (re)define the method.
+    // Abstract classes are suppressed.
+  const GrowableArray<methodHandle>* target_methods() const   { return _target_methods; }
+    // Returns the list of possible target methods, i.e., all methods potentially invoked
+    // by this send (empty array if none, capped at max_result).
+    // If the receiver klass (or one of its superclasses) defines the method, this definition
+    // is included in the result.  Abstract methods are suppressed.
+  void print();
+};
+
+
+class CHA : AllStatic {
+  static int _max_result;           // maximum result size (for efficiency)
+  static bool _used;                // has CHA been used yet?  (will go away when deoptimization implemented)
+
+  static void process_class(KlassHandle r, GrowableArray<KlassHandle>* receivers, GrowableArray<methodHandle>* methods,
+                            symbolHandle name, symbolHandle signature);
+  static void process_interface(instanceKlassHandle r, GrowableArray<KlassHandle>* receivers, GrowableArray<methodHandle>* methods,
+                            symbolHandle name, symbolHandle signature);
+ public:
+  static bool has_been_used()       { return _used; }
+  static int  max_result()          { return _max_result; }
+  static void set_max_result(int n) { _max_result = n; }
+
+  static CHAResult* analyze_call(KlassHandle calling_klass, KlassHandle static_receiver,
+                                 KlassHandle actual_receiver, symbolHandle name, symbolHandle signature);
+};
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/compiler/disassemblerEnv.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,20 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)disassemblerEnv.hpp	1.14 05/11/18 15:21:38 JVM"
+#endif
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+// Call-back interface for external disassembler
+class DisassemblerEnv {
+ public:
+  // printing
+  virtual void print_label(intptr_t value)   = 0;
+  virtual void print_raw(char* str)     = 0;
+  virtual void print(char* format, ...) = 0;
+  // helpers
+  virtual char* string_for_offset(intptr_t value) = 0;
+  virtual char* string_for_constant(unsigned char* pc, intptr_t value, int is_decimal) = 0;
+};
+
--- a/hotspot/src/share/vm/includeDB_compiler1	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/includeDB_compiler1	Thu Sep 30 13:48:16 2010 +0800
@@ -335,7 +335,7 @@
 c1_Runtime1.cpp                         compilationPolicy.hpp
 c1_Runtime1.cpp                         compiledIC.hpp
 c1_Runtime1.cpp                         copy.hpp
-c1_Runtime1.cpp                         disassembler.hpp
+c1_Runtime1.cpp                         disassembler_<arch>.hpp
 c1_Runtime1.cpp                         events.hpp
 c1_Runtime1.cpp                         interfaceSupport.hpp
 c1_Runtime1.cpp                         interpreter.hpp
--- a/hotspot/src/share/vm/includeDB_core	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/includeDB_core	Thu Sep 30 13:48:16 2010 +0800
@@ -245,7 +245,7 @@

 assembler.inline.hpp                    assembler.hpp
 assembler.inline.hpp                    codeBuffer.hpp
-assembler.inline.hpp                    disassembler.hpp
+assembler.inline.hpp                    disassembler_<arch>.hpp
 assembler.inline.hpp                    threadLocalStorage.hpp

 assembler_<arch>.cpp              assembler_<arch>.inline.hpp
@@ -955,7 +955,7 @@
 codeBlob.cpp                            bytecode.hpp
 codeBlob.cpp                            codeBlob.hpp
 codeBlob.cpp                            codeCache.hpp
-codeBlob.cpp                            disassembler.hpp
+codeBlob.cpp                            disassembler_<arch>.hpp
 codeBlob.cpp                            forte.hpp
 codeBlob.cpp                            handles.inline.hpp
 codeBlob.cpp                            heap.hpp
@@ -977,7 +977,7 @@

 codeBuffer.cpp                          codeBuffer.hpp
 codeBuffer.cpp                          copy.hpp
-codeBuffer.cpp                          disassembler.hpp
+codeBuffer.cpp                          disassembler_<arch>.hpp

 codeBuffer.hpp                          assembler.hpp
 codeBuffer.hpp                          oopRecorder.hpp
@@ -1336,7 +1336,7 @@
 debug.cpp                               collectedHeap.hpp
 debug.cpp                               compileBroker.hpp
 debug.cpp                               defaultStream.hpp
-debug.cpp                               disassembler.hpp
+debug.cpp                               disassembler_<arch>.hpp
 debug.cpp                               events.hpp
 debug.cpp                               frame.hpp
 debug.cpp                               heapDumper.hpp
@@ -1455,7 +1455,7 @@
 deoptimization.hpp                      frame.inline.hpp

 depChecker_<arch>.cpp                   depChecker_<arch>.hpp
-depChecker_<arch>.cpp                   disassembler.hpp
+depChecker_<arch>.cpp                   disassembler_<arch>.hpp
 depChecker_<arch>.cpp                   hpi.hpp

 dependencies.cpp                        ciArrayKlass.hpp
@@ -1485,21 +1485,23 @@
 dictionary.hpp                          oop.hpp
 dictionary.hpp                          systemDictionary.hpp

-disassembler_<arch>.hpp                 generate_platform_dependent_include
-
-disassembler.cpp                        cardTableModRefBS.hpp
-disassembler.cpp                        codeCache.hpp
-disassembler.cpp                        collectedHeap.hpp
-disassembler.cpp                        depChecker_<arch>.hpp
-disassembler.cpp                        disassembler.hpp
-disassembler.cpp                        fprofiler.hpp
-disassembler.cpp                        handles.inline.hpp
-disassembler.cpp                        hpi.hpp
-disassembler.cpp                        stubCodeGenerator.hpp
-disassembler.cpp                        stubRoutines.hpp
-
-disassembler.hpp                        globals.hpp
-disassembler.hpp                        os_<os_family>.inline.hpp
+//disassembler_<arch>.hpp                 generate_platform_dependent_include
+
+disassembler_<arch>.cpp                 cardTableModRefBS.hpp
+disassembler_<arch>.cpp                 codeCache.hpp
+disassembler_<arch>.cpp                 collectedHeap.hpp
+disassembler_<arch>.cpp                 depChecker_<arch>.hpp
+disassembler_<arch>.cpp                 disassembler_<arch>.hpp
+disassembler_<arch>.cpp                 fprofiler.hpp
+disassembler_<arch>.cpp                 handles.inline.hpp
+disassembler_<arch>.cpp                 hpi.hpp
+disassembler_<arch>.cpp                 stubCodeGenerator.hpp
+disassembler_<arch>.cpp                 stubRoutines.hpp
+
+disassembler_<arch>.hpp                 disassemblerEnv.hpp
+disassembler_<arch>.hpp                 os_<os_family>.inline.hpp
+
+disassemblerEnv.hpp			globals.hpp

 dtraceAttacher.cpp                      codeCache.hpp
 dtraceAttacher.cpp                      deoptimization.hpp
@@ -2958,13 +2960,15 @@
 nativeLookup.hpp                        handles.hpp
 nativeLookup.hpp                        top.hpp

+disassembler_<arch>.cpp			nmethod.hpp
+
 nmethod.cpp                             abstractCompiler.hpp
 nmethod.cpp                             bytecode.hpp
 nmethod.cpp                             codeCache.hpp
 nmethod.cpp                             compileLog.hpp
 nmethod.cpp                             compiledIC.hpp
 nmethod.cpp                             compilerOracle.hpp
-nmethod.cpp                             disassembler.hpp
+nmethod.cpp                             disassembler_<arch>.hpp
 nmethod.cpp                             dtrace.hpp
 nmethod.cpp                             events.hpp
 nmethod.cpp                             jvmtiRedefineClassesTrace.hpp
@@ -3844,7 +3848,7 @@
 statSampler.hpp                         task.hpp

 stubCodeGenerator.cpp                   assembler_<arch>.inline.hpp
-stubCodeGenerator.cpp                   disassembler.hpp
+stubCodeGenerator.cpp                   disassembler_<arch>.hpp
 stubCodeGenerator.cpp                   forte.hpp
 stubCodeGenerator.cpp                   oop.inline.hpp
 stubCodeGenerator.cpp                   stubCodeGenerator.hpp
@@ -4612,7 +4616,7 @@
 vmreg_<arch>.hpp                        generate_platform_dependent_include

 vtableStubs.cpp                         allocation.inline.hpp
-vtableStubs.cpp                         disassembler.hpp
+vtableStubs.cpp                         disassembler_<arch>.hpp
 vtableStubs.cpp                         forte.hpp
 vtableStubs.cpp                         handles.inline.hpp
 vtableStubs.cpp                         instanceKlass.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/interpreter/cInterpretMethod.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,2645 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)cInterpretMethod.hpp	1.62 06/03/27 15:36:43 JVM"
+#endif
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+#ifndef CINTERPRETERBODY_ONCE
+#define CINTERPERTERBODY_ONCE
+#endif
+
+/*
+ * This code was converted from CVM sources to C++ and the Hotspot VM
+ */
+
+#ifdef CC_INTERP
+
+/*
+ * USELABELS - If using GCC, then use labels for the opcode dispatching
+ * rather -then a switch statement. This improves performance because it
+ * gives us the oportunity to have the instructions that calculate the
+ * next opcode to jump to be intermixed with the rest of the instructions
+ * that implement the opcode (see UPDATE_PC_AND_TOS_AND_CONTINUE macro).
+ */
+#undef USELABELS
+#ifdef __GNUC__
+/*
+   ASSERT signifies debugging. It is much easier to step thru bytecodes if we
+   don't use the computed goto approach.
+*/
+#ifndef ASSERT
+#define USELABELS
+#endif
+#endif
+
+#undef CASE
+#ifdef USELABELS
+#define CASE(opcode) opc ## opcode
+#define DEFAULT opc_default
+#else
+#define CASE(opcode) case Bytecodes:: opcode
+#define DEFAULT default
+#endif
+
+/*
+ * PREFETCH_OPCCODE - Some compilers do better if you prefetch the next
+ * opcode before going back to the top of the while loop, rather then having
+ * the top of the while loop handle it. This provides a better opportunity
+ * for instruction scheduling. Some compilers just do this prefetch
+ * automatically. Some actually end up with worse performance if you
+ * force the prefetch. Solaris gcc seems to do better, but cc does worse.
+ */
+#undef PREFETCH_OPCCODE
+#define PREFETCH_OPCCODE
+
+/*
+  Interpreter safepoint: it is expected that the interpreter will have no live
+  handles of its own creation live at an interpreter safepoint. Therefore we
+  run a HandleMarkCleaner and trash all handles allocated in the call chain
+  since the JavaCalls::call_helper invocation that initiated the chain.
+  There really shouldn't be any handles remaining to trash but this is cheap
+  in relation to a safepoint.
+*/
+#define SAFEPOINT                                                                 \
+    if ( SafepointSynchronize::is_synchronizing()) {                              \
+        {                                                                         \
+          /* zap freed handles rather than GC'ing them */                         \
+          HandleMarkCleaner __hmc(THREAD);                                        \
+        }                                                                         \
+        CALL_VM(SafepointSynchronize::block(THREAD), handle_exception);           \
+    }
+
+/*
+ * VM_JAVA_ERROR - Macro for throwing a java exception from
+ * the interpreter loop. Should really be a CALL_VM but there
+ * is no entry point to do the transition to vm so we just
+ * do it by hand here.
+ */
+#define VM_JAVA_ERROR_NO_JUMP(name, msg)                                                  \
+    DECACHE_STATE();                                                              \
+    SET_LAST_JAVA_FRAME();                                                        \
+    {                                                                             \
+       ThreadInVMfromJava trans(THREAD);                                          \
+       Exceptions::_throw_msg(THREAD, __FILE__, __LINE__, name, msg, NULL, NULL); \
+    }                                                                             \
+    RESET_LAST_JAVA_FRAME();                                                      \
+    CACHE_STATE();
+
+// Normal throw of a java error
+#define VM_JAVA_ERROR(name, msg)                                                  \
+    VM_JAVA_ERROR_NO_JUMP(name, msg)                                              \
+    goto handle_exception;
+
+#ifdef PRODUCT
+#define DO_UPDATE_INSTRUCTION_COUNT(opcode)
+#else
+#define DO_UPDATE_INSTRUCTION_COUNT(opcode)                                                          \
+{                                                                                                    \
+    BytecodeCounter::_counter_value++;                                                               \
+    BytecodeHistogram::_counters[(Bytecodes::Code)opcode]++;                                         \
+    if (StopInterpreterAt && StopInterpreterAt == BytecodeCounter::_counter_value) os::breakpoint(); \
+    if (TraceBytecodes) {                                                                            \
+      CALL_VM((void)SharedRuntime::trace_bytecode(THREAD, 0,               \
+                                   topOfStack[Interpreter::expr_index_at(1)],   \
+                                   topOfStack[Interpreter::expr_index_at(2)]),  \
+                                   handle_exception);                      \
+    }                                                                      \
+}
+#endif
+
+#undef DEBUGGER_SINGLE_STEP_NOTIFY
+#ifdef VM_JVMTI
+/* NOTE: (kbr) This macro must be called AFTER the PC has been
+   incremented. JvmtiExport::at_single_stepping_point() may cause a
+   breakpoint opcode to get inserted at the current PC to allow the
+   debugger to coalesce single-step events.
+
+   As a result if we call at_single_stepping_point() we refetch opcode
+   to get the current opcode. This will override any other prefetching
+   that might have occurred.
+*/
+#define DEBUGGER_SINGLE_STEP_NOTIFY()                                            \
+{                                                                                \
+      if (_jvmti_interp_events) {                                                \
+        if (JvmtiExport::should_post_single_step()) {                            \
+          DECACHE_STATE();                                                       \
+          SET_LAST_JAVA_FRAME();                                                 \
+          ThreadInVMfromJava trans(THREAD);                                      \
+          JvmtiExport::at_single_stepping_point(THREAD,                          \
+                                          istate->method(),                      \
+                                          pc);                                   \
+          RESET_LAST_JAVA_FRAME();                                               \
+          CACHE_STATE();                                                         \
+          if (THREAD->pop_frame_pending() &&                                     \
+              !THREAD->pop_frame_in_process()) {                                 \
+            goto handle_Pop_Frame;                                               \
+          }                                                                      \
+          opcode = *pc;                                                          \
+        }                                                                        \
+      }                                                                          \
+}
+#else
+#define DEBUGGER_SINGLE_STEP_NOTIFY()
+#endif
+
+/*
+ * CONTINUE - Macro for executing the next opcode.
+ */
+#undef CONTINUE
+#ifdef USELABELS
+// Have to do this dispatch this way in C++ because otherwise gcc complains about crossing an
+// initialization (which is is the initialization of the table pointer...)
+#define DISPATCH(opcode) goto *dispatch_table[opcode]
+#define CONTINUE {                              \
+        opcode = *pc;                           \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);    \
+        DEBUGGER_SINGLE_STEP_NOTIFY();          \
+        DISPATCH(opcode);                       \
+    }
+#else
+#ifdef PREFETCH_OPCCODE
+#define CONTINUE {                              \
+        opcode = *pc;                           \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);    \
+        DEBUGGER_SINGLE_STEP_NOTIFY();          \
+        continue;                               \
+    }
+#else
+#define CONTINUE {                              \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);    \
+        DEBUGGER_SINGLE_STEP_NOTIFY();          \
+        continue;                               \
+    }
+#endif
+#endif
+
+// JavaStack Implementation
+#define MORE_STACK(count)  \
+    (topOfStack -= ((count) * Interpreter::stackElementWords()))
+
+
+#define UPDATE_PC(opsize) {pc += opsize; }
+/*
+ * UPDATE_PC_AND_TOS - Macro for updating the pc and topOfStack.
+ */
+#undef UPDATE_PC_AND_TOS
+#define UPDATE_PC_AND_TOS(opsize, stack) \
+    {pc += opsize; MORE_STACK(stack); }
+
+/*
+ * UPDATE_PC_AND_TOS_AND_CONTINUE - Macro for updating the pc and topOfStack,
+ * and executing the next opcode. It's somewhat similar to the combination
+ * of UPDATE_PC_AND_TOS and CONTINUE, but with some minor optimizations.
+ */
+#undef UPDATE_PC_AND_TOS_AND_CONTINUE
+#ifdef USELABELS
+#define UPDATE_PC_AND_TOS_AND_CONTINUE(opsize, stack) {         \
+        pc += opsize; opcode = *pc; MORE_STACK(stack);          \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);                    \
+        DEBUGGER_SINGLE_STEP_NOTIFY();                          \
+        DISPATCH(opcode);                                       \
+    }
+
+#define UPDATE_PC_AND_CONTINUE(opsize) {                        \
+        pc += opsize; opcode = *pc;                             \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);                    \
+        DEBUGGER_SINGLE_STEP_NOTIFY();                          \
+        DISPATCH(opcode);                                       \
+    }
+#else
+#ifdef PREFETCH_OPCCODE
+#define UPDATE_PC_AND_TOS_AND_CONTINUE(opsize, stack) {         \
+        pc += opsize; opcode = *pc; MORE_STACK(stack);          \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);                    \
+        DEBUGGER_SINGLE_STEP_NOTIFY();                          \
+        goto do_continue;                                       \
+    }
+
+#define UPDATE_PC_AND_CONTINUE(opsize) {                        \
+        pc += opsize; opcode = *pc;                             \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);                    \
+        DEBUGGER_SINGLE_STEP_NOTIFY();                          \
+        goto do_continue;                                       \
+    }
+#else
+#define UPDATE_PC_AND_TOS_AND_CONTINUE(opsize, stack) { \
+        pc += opsize; MORE_STACK(stack);                \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);            \
+        DEBUGGER_SINGLE_STEP_NOTIFY();                  \
+        goto do_continue;                               \
+    }
+
+#define UPDATE_PC_AND_CONTINUE(opsize) {                \
+        pc += opsize;                                   \
+        DO_UPDATE_INSTRUCTION_COUNT(opcode);            \
+        DEBUGGER_SINGLE_STEP_NOTIFY();                  \
+        goto do_continue;                               \
+    }
+#endif /* PREFETCH_OPCCODE */
+#endif /* USELABELS */
+
+// About to call a new method, update the save the adjusted pc and return to frame manager
+#define UPDATE_PC_AND_RETURN(opsize)  \
+   DECACHE_TOS();                     \
+   istate->set_bcp(pc+opsize);        \
+   return;
+
+
+#define METHOD istate->method()
+#define INVOCATION_COUNT METHOD->invocation_counter()
+#define BACKEDGE_COUNT METHOD->backedge_counter()
+
+#ifdef CORE
+#define INCR_INVOCATION_COUNT
+
+// Allow a safepoint on backedges
+#define DO_BACKEDGE_CHECKS(skip, branch_pc)                                \
+    if ((skip) <= 0) {                                                     \
+      SAFEPOINT;                                                           \
+    }
+
+#else /* ! CORE */
+
+
+#define INCR_INVOCATION_COUNT INVOCATION_COUNT->increment()
+#define OSR_REQUEST(res, branch_pc) \
+            CALL_VM(res=InterpreterRuntime::frequency_counter_overflow(THREAD, branch_pc), handle_exception);
+/*
+ * For those opcodes that need to have a GC point on a backwards branch
+ */
+
+// Backedge counting is kind of strange. The asm interpreter will increment
+// the backedge counter as a separate counter but it does it's comparisons
+// to the sum (scaled) of invocation counter and backedge count to make
+// a decision. Seems kind of odd to sum them together like that
+
+// skip is delta from current bcp/bci for target, branch_pc is pre-branch bcp
+
+
+#define DO_BACKEDGE_CHECKS(skip, branch_pc)                                                         \
+    if ((skip) <= 0) {                                                                              \
+      if (UseCompiler && UseLoopCounter) {                                                          \
+        bool do_OSR = UseOnStackReplacement;                                                        \
+        BACKEDGE_COUNT->increment();                                                                \
+        if (do_OSR) do_OSR = BACKEDGE_COUNT->reached_InvocationLimit();                             \
+        if (do_OSR) {                                                                               \
+          nmethod*  osr_nmethod;                                                                    \
+          OSR_REQUEST(osr_nmethod, branch_pc);                                                      \
+          if (osr_nmethod->osr_entry_bci() != InvalidOSREntryBci) {                                 \
+            intptr_t* buf;                                                                          \
+            CALL_VM(buf=SharedRuntime::OSR_migration_begin(THREAD), handle_exception);              \
+            istate->set_msg(do_osr);                                                                \
+            istate->set_osr_buf((address)buf);                                                      \
+            istate->set_osr_entry(osr_nmethod->osr_entry());                                        \
+            return;                                                                                 \
+          }                                                                                         \
+        } else {                                                                                    \
+          INCR_INVOCATION_COUNT;                                                                    \
+          SAFEPOINT;                                                                                \
+        }                                                                                           \
+      }  /* UseCompiler ... */                                                                      \
+      INCR_INVOCATION_COUNT;                                                                        \
+      SAFEPOINT;                                                                                    \
+    }
+
+#endif /* ! CORE */
+/*
+ * Macros for accessing the stack.
+ */
+#undef STACK_INT
+#undef STACK_FLOAT
+#undef STACK_OBJECT
+#undef STACK_DOUBLE
+#undef STACK_LONG
+// JavaStack Implementation
+
+#define STACK_SLOT(offset)       stack_slot(topOfStack, (offset))
+#define STACK_INT(offset)        stack_int(topOfStack, (offset))
+#define STACK_FLOAT(offset)      stack_float(topOfStack, (offset))
+#define STACK_OBJECT(offset)     stack_object(topOfStack, (offset))
+#define STACK_DOUBLE(offset)     stack_double(topOfStack, (offset))
+#define STACK_LONG(offset)       stack_long(topOfStack, (offset))
+
+
+#define SET_STACK_SLOT(value, offset)            \
+        set_stack_slot(topOfStack, (value), (offset))
+#define SET_STACK_INT(value, offset)             \
+        set_stack_int(topOfStack, (value), (offset))
+#define SET_STACK_FLOAT(value, offset)           \
+        set_stack_float(topOfStack, (value), (offset))
+#define SET_STACK_OBJECT(value, offset)          \
+        set_stack_object(topOfStack, (value), (offset))
+#define SET_STACK_DOUBLE(value, offset)          \
+        set_stack_double(topOfStack, (value), (offset))
+#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) \
+        set_stack_double_from_addr(topOfStack, (addr), (offset))
+#define SET_STACK_LONG(value, offset)            \
+        set_stack_long(topOfStack, (value), (offset))
+#define SET_STACK_LONG_FROM_ADDR(addr, offset)   \
+        set_stack_long_from_addr(topOfStack, (addr), (offset))
+
+#define LOCALS_SLOT(offset)      locals_slot(locals, (offset))
+#define LOCALS_INT(offset)       locals_int(locals,  (offset))
+#define LOCALS_FLOAT(offset)     locals_float(locals, (offset))
+#define LOCALS_OBJECT(offset)    locals_object(locals, (offset))
+#define LOCALS_DOUBLE(offset)    locals_double(locals, (offset))
+#define LOCALS_LONG(offset)      locals_long(locals, (offset))
+#define LOCALS_LONG_AT(offset)   locals_long_at(locals, (offset))
+#define LOCALS_DOUBLE_AT(offset) locals_double_at(locals, (offset))
+
+
+#define SET_LOCALS_SLOT(value, offset)            \
+        set_locals_slot(locals, (value), (offset))
+#define SET_LOCALS_INT(value, offset)             \
+        set_locals_int(locals, (value), (offset))
+#define SET_LOCALS_FLOAT(value, offset)           \
+        set_locals_float(locals, (value), (offset))
+#define SET_LOCALS_OBJECT(value, offset)          \
+        set_locals_object(locals, (value), (offset))
+#define SET_LOCALS_DOUBLE(value, offset)          \
+        set_locals_double(locals, (value), (offset))
+#define SET_LOCALS_LONG(value, offset)            \
+        set_locals_long(locals, (value), (offset))
+#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) \
+        set_locals_double_from_addr(locals, (addr), (offset))
+#define SET_LOCALS_LONG_FROM_ADDR(addr, offset)   \
+        set_locals_long_from_addr(locals, (addr), (offset))
+
+
+/*
+ * Macros for caching and flushing the interpreter state. Some local
+ * variables need to be flushed out to the frame before we do certain
+ * things (like pushing frames or becomming gc safe) and some need to
+ * be recached later (like after popping a frame). We could use one
+ * macro to cache or decache everything, but this would be less then
+ * optimal because we don't always need to cache or decache everything
+ * because some things we know are already cached or decached.
+ */
+#undef DECACHE_TOS
+#undef CACHE_TOS
+#undef CACHE_PREV_TOS
+#define DECACHE_TOS()    istate->set_stack(topOfStack);
+
+#define CACHE_TOS()      topOfStack = (intptr_t *)istate->stack();
+
+#undef DECACHE_PC
+#undef CACHE_PC
+#define DECACHE_PC()    istate->set_bcp(pc);
+#define CACHE_PC()      pc = istate->bcp();
+#define CACHE_CP()      cp = istate->constants();
+#define CACHE_LOCALS()  locals = istate->locals();
+#undef CACHE_FRAME
+#define CACHE_FRAME()
+
+/*
+ * CHECK_NULL - Macro for throwing a NullPointerException if the object
+ * passed is a null ref.
+ * On some architectures/platforms it should be possible to do this implicitly
+ */
+#undef CHECK_NULL
+#define CHECK_NULL(obj_)                                                 \
+    if ((obj_) == 0) {                                                   \
+        VM_JAVA_ERROR(vmSymbols::java_lang_NullPointerException(), "");  \
+    }
+
+#define VMdoubleConstZero() 0.0
+#define VMdoubleConstOne() 1.0
+#define VMlongConstZero() (max_jlong-max_jlong)
+#define VMlongConstOne() ((max_jlong-max_jlong)+1)
+
+/*
+ * Alignment
+ */
+/* #define VMalignWordUp(val)          (((juint)(val) + 3) & ~3) */
+#define VMalignWordUp(val)          (((uintptr_t)(val) + 3) & ~3)
+
+// Decache the interpreter state that interpreter modifies directly (i.e. GC is indirect mod)
+#define DECACHE_STATE() DECACHE_PC(); DECACHE_TOS();
+
+// Reload interpreter state after calling the VM or a possible GC
+#define CACHE_STATE()   \
+        CACHE_TOS();    \
+        CACHE_PC();     \
+        CACHE_CP();     \
+        CACHE_LOCALS();
+
+// Call the VM don't check for pending exceptions
+#define CALL_VM_NOCHECK(func)                                     \
+          DECACHE_STATE();                                        \
+          SET_LAST_JAVA_FRAME();                                  \
+          func;                                                   \
+          RESET_LAST_JAVA_FRAME();                                \
+          CACHE_STATE();                                          \
+          if (THREAD->pop_frame_pending() &&                      \
+              !THREAD->pop_frame_in_process()) {                  \
+            goto handle_Pop_Frame;                                \
+          }
+
+// Call the VM and check for pending exceptions
+#define CALL_VM(func, label) {                                    \
+          CALL_VM_NOCHECK(func);                                  \
+          if (THREAD->pending_exception()) goto label;            \
+        }
+
+/*
+ * cInterpreter::InterpretMethod(interpreterState istate)
+ * cInterpreter::InterpretMethodWithChecks(interpreterState istate)
+ *
+ * The real deal. This is where byte codes actually get interpreted.
+ * Basically it's a big while loop that iterates until we return from
+ * the method passed in.
+ *
+ * The InterpretMethodWithChecks is used if JVMTI or JVMPI are enabled.
+ *
+ */
+#if defined(VM_JVMTI) || defined(VM_JVMPI)
+void
+cInterpreter::InterpretMethodWithChecks(interpreterState istate) {
+#else
+void
+cInterpreter::InterpretMethod(interpreterState istate) {
+#endif
+
+  // In order to simplify some tests based on switches set at runtime
+  // we invoke the interpreter a single time after switches are enabled
+  // and set simpler to to test variables rather than method calls or complex
+  // boolean expressions.
+
+  static int initialized = 0;
+#ifdef VM_JVMTI
+  static bool _jvmti_interp_events = 0;
+#endif
+
+#ifndef CORE
+  static int _compiling;  // (UseCompiler || CountCompiledCalls)
+#endif
+
+#ifdef ASSERT
+  // Verify linkages.
+  interpreterState l = istate;
+  do {
+    assert(l == l->_self_link, "bad link");
+    l = l->_prev_link;
+  } while (l != NULL);
+  // Screwups with stack management usually cause us to overwrite istate
+  // save a copy so we can verify it.
+  interpreterState orig = istate;
+#endif
+
+  static volatile jbyte* _byte_map_base; // adjusted card table base for oop store barrier
+
+  register intptr_t*        topOfStack = (intptr_t *)istate->stack(); /* access with STACK macros */
+  register address          pc = istate->bcp();
+  register jubyte opcode;
+  register intptr_t*        locals = istate->locals();
+  register constantPoolCacheOop  cp = istate->constants(); // method()->constants()->cache()
+#ifdef LOTS_OF_REGS
+  register JavaThread*      THREAD = istate->thread();
+  register volatile jbyte*  BYTE_MAP_BASE = _byte_map_base;
+#else
+#undef THREAD
+#define THREAD istate->thread()
+#undef BYTE_MAP_BASE
+#define BYTE_MAP_BASE _byte_map_base
+#endif
+
+#ifdef USELABELS
+  const static void* const opclabels_data[256] = {
+/* 0x00 */ &&opc_nop,     &&opc_aconst_null,&&opc_iconst_m1,&&opc_iconst_0,
+/* 0x04 */ &&opc_iconst_1,&&opc_iconst_2,   &&opc_iconst_3, &&opc_iconst_4,
+/* 0x08 */ &&opc_iconst_5,&&opc_lconst_0,   &&opc_lconst_1, &&opc_fconst_0,
+/* 0x0C */ &&opc_fconst_1,&&opc_fconst_2,   &&opc_dconst_0, &&opc_dconst_1,
+
+/* 0x10 */ &&opc_bipush, &&opc_sipush, &&opc_ldc,    &&opc_ldc_w,
+/* 0x14 */ &&opc_ldc2_w, &&opc_iload,  &&opc_lload,  &&opc_fload,
+/* 0x18 */ &&opc_dload,  &&opc_aload,  &&opc_iload_0,&&opc_iload_1,
+/* 0x1C */ &&opc_iload_2,&&opc_iload_3,&&opc_lload_0,&&opc_lload_1,
+
+/* 0x20 */ &&opc_lload_2,&&opc_lload_3,&&opc_fload_0,&&opc_fload_1,
+/* 0x24 */ &&opc_fload_2,&&opc_fload_3,&&opc_dload_0,&&opc_dload_1,
+/* 0x28 */ &&opc_dload_2,&&opc_dload_3,&&opc_aload_0,&&opc_aload_1,
+/* 0x2C */ &&opc_aload_2,&&opc_aload_3,&&opc_iaload, &&opc_laload,
+
+/* 0x30 */ &&opc_faload,  &&opc_daload,  &&opc_aaload,  &&opc_baload,
+/* 0x34 */ &&opc_caload,  &&opc_saload,  &&opc_istore,  &&opc_lstore,
+/* 0x38 */ &&opc_fstore,  &&opc_dstore,  &&opc_astore,  &&opc_istore_0,
+/* 0x3C */ &&opc_istore_1,&&opc_istore_2,&&opc_istore_3,&&opc_lstore_0,
+
+/* 0x40 */ &&opc_lstore_1,&&opc_lstore_2,&&opc_lstore_3,&&opc_fstore_0,
+/* 0x44 */ &&opc_fstore_1,&&opc_fstore_2,&&opc_fstore_3,&&opc_dstore_0,
+/* 0x48 */ &&opc_dstore_1,&&opc_dstore_2,&&opc_dstore_3,&&opc_astore_0,
+/* 0x4C */ &&opc_astore_1,&&opc_astore_2,&&opc_astore_3,&&opc_iastore,
+
+/* 0x50 */ &&opc_lastore,&&opc_fastore,&&opc_dastore,&&opc_aastore,
+/* 0x54 */ &&opc_bastore,&&opc_castore,&&opc_sastore,&&opc_pop,
+/* 0x58 */ &&opc_pop2,   &&opc_dup,    &&opc_dup_x1, &&opc_dup_x2,
+/* 0x5C */ &&opc_dup2,   &&opc_dup2_x1,&&opc_dup2_x2,&&opc_swap,
+
+/* 0x60 */ &&opc_iadd,&&opc_ladd,&&opc_fadd,&&opc_dadd,
+/* 0x64 */ &&opc_isub,&&opc_lsub,&&opc_fsub,&&opc_dsub,
+/* 0x68 */ &&opc_imul,&&opc_lmul,&&opc_fmul,&&opc_dmul,
+/* 0x6C */ &&opc_idiv,&&opc_ldiv,&&opc_fdiv,&&opc_ddiv,
+
+/* 0x70 */ &&opc_irem, &&opc_lrem, &&opc_frem,&&opc_drem,
+/* 0x74 */ &&opc_ineg, &&opc_lneg, &&opc_fneg,&&opc_dneg,
+/* 0x78 */ &&opc_ishl, &&opc_lshl, &&opc_ishr,&&opc_lshr,
+/* 0x7C */ &&opc_iushr,&&opc_lushr,&&opc_iand,&&opc_land,
+
+/* 0x80 */ &&opc_ior, &&opc_lor,&&opc_ixor,&&opc_lxor,
+/* 0x84 */ &&opc_iinc,&&opc_i2l,&&opc_i2f, &&opc_i2d,
+/* 0x88 */ &&opc_l2i, &&opc_l2f,&&opc_l2d, &&opc_f2i,
+/* 0x8C */ &&opc_f2l, &&opc_f2d,&&opc_d2i, &&opc_d2l,
+
+/* 0x90 */ &&opc_d2f,  &&opc_i2b,  &&opc_i2c,  &&opc_i2s,
+/* 0x94 */ &&opc_lcmp, &&opc_fcmpl,&&opc_fcmpg,&&opc_dcmpl,
+/* 0x98 */ &&opc_dcmpg,&&opc_ifeq, &&opc_ifne, &&opc_iflt,
+/* 0x9C */ &&opc_ifge, &&opc_ifgt, &&opc_ifle, &&opc_if_icmpeq,
+
+/* 0xA0 */ &&opc_if_icmpne,&&opc_if_icmplt,&&opc_if_icmpge,  &&opc_if_icmpgt,
+/* 0xA4 */ &&opc_if_icmple,&&opc_if_acmpeq,&&opc_if_acmpne,  &&opc_goto,
+/* 0xA8 */ &&opc_jsr,      &&opc_ret,      &&opc_tableswitch,&&opc_lookupswitch,
+/* 0xAC */ &&opc_ireturn,  &&opc_lreturn,  &&opc_freturn,    &&opc_dreturn,
+
+/* 0xB0 */ &&opc_areturn,     &&opc_return,         &&opc_getstatic,    &&opc_putstatic,
+/* 0xB4 */ &&opc_getfield,    &&opc_putfield,       &&opc_invokevirtual,&&opc_invokespecial,
+/* 0xB8 */ &&opc_invokestatic,&&opc_invokeinterface,NULL,               &&opc_new,
+/* 0xBC */ &&opc_newarray,    &&opc_anewarray,      &&opc_arraylength,  &&opc_athrow,
+
+/* 0xC0 */ &&opc_checkcast,   &&opc_instanceof,     &&opc_monitorenter, &&opc_monitorexit,
+/* 0xC4 */ &&opc_wide,        &&opc_multianewarray, &&opc_ifnull,       &&opc_ifnonnull,
+/* 0xC8 */ &&opc_goto_w,      &&opc_jsr_w,          &&opc_breakpoint,   &&opc_fast_igetfield,
+/* 0xCC */ &&opc_fastagetfield,&&opc_fast_aload_0,  &&opc_fast_iaccess_0, &&opc__fast_aaccess_0,
+
+/* 0xD0 */ &&opc_fast_linearswitch, &&opc_fast_binaryswitch, &&opc_return_register_finalizer,      &&opc_default,
+/* 0xD4 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xD8 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xDC */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+
+/* 0xE0 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xE4 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xE8 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xEC */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+
+/* 0xF0 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xF4 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xF8 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xFC */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default
+  };
+  register uintptr_t *dispatch_table = (uintptr_t*)&opclabels_data[0];
+#endif /* USELABELS */
+
+#ifdef ASSERT
+  // this will trigger a VERIFY_OOP on entry
+  if (istate->msg() != initialize && ! istate->method()->is_static()) {
+    oop rcvr = LOCALS_OBJECT(0);
+  }
+#endif
+
+  /* QQQ this should be a stack method so we don't know actual direction */
+  assert(istate->msg() == initialize ||
+         topOfStack >= istate->stack_limit() &&
+         topOfStack < istate->stack_base(),
+         "Stack top out of range");
+
+  switch (istate->msg()) {
+    case initialize: {
+      if (initialized++) ShouldNotReachHere(); // Only one initialize call
+#ifndef CORE
+      _compiling = (UseCompiler || CountCompiledCalls);
+#endif
+#ifdef VM_JVMTI
+      _jvmti_interp_events = JvmtiExport::can_post_interpreter_events();
+#endif
+      BarrierSet* bs = Universe::heap()->barrier_set();
+      assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+      _byte_map_base = (volatile jbyte*)(((CardTableModRefBS*)bs)->byte_map_base);
+      return;
+    }
+    break;
+    case method_entry: {
+      THREAD->set_do_not_unlock();
+      // count invocations
+      assert(initialized, "Interpreter not initialized");
+#ifndef CORE
+      if (_compiling) {
+        if (ProfileInterpreter) {
+          istate->method()->increment_interpreter_invocation_count();
+        }
+        INCR_INVOCATION_COUNT;
+        if (istate->method()->invocation_counter()->has_overflowed()) {
+            CALL_VM((void)InterpreterRuntime::frequency_counter_overflow(THREAD, NULL), handle_exception);
+
+            // We no longer retry on a counter overflow
+
+            // istate->set_msg(retry_method);
+            // THREAD->clr_do_not_unlock();
+            // return;
+        }
+        SAFEPOINT;
+      }
+#endif // !CORE
+
+#ifdef HACK
+      {
+        ResourceMark rm;
+        char *method_name = istate->method()->name_and_sig_as_C_string();
+        if (strstr(method_name, "SecurityManager$1") != NULL) os::breakpoint();
+      }
+#endif // HACK
+
+
+      // lock method if synchronized
+      if (istate->method()->is_synchronized()) {
+          // oop rcvr = locals[0].j.r;
+          oop rcvr;
+          if (istate->method()->is_static()) {
+            rcvr = istate->method()->constants()->pool_holder()->klass_part()->java_mirror();
+          } else {
+            rcvr = LOCALS_OBJECT(0);
+          }
+          // The initial monitor is ours for the taking
+          BasicObjectLock* mon = &istate->monitor_base()[-1];
+          oop monobj = mon->obj();
+          assert(mon->obj() == rcvr, "method monitor mis-initialized");
+
+          markOop displaced = rcvr->mark()->set_unlocked();
+          mon->lock()->set_displaced_header(displaced);
+          if (Atomic::cmpxchg_ptr(mon, rcvr->mark_addr(), displaced) != displaced) {
+            // Is it simple recursive case?
+            if (THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
+              mon->lock()->set_displaced_header(NULL);
+            } else {
+              CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);
+            }
+          }
+      }
+      THREAD->clr_do_not_unlock();
+
+      // Notify jvmti/jvmpi
+#ifdef VM_JVMTI
+      if (_jvmti_interp_events) {
+        // Whenever JVMTI puts a thread in interp_only_mode, method
+        // entry/exit events are sent for that thread to track stack depth.
+        if (THREAD->is_interp_only_mode()) {
+          CALL_VM(InterpreterRuntime::post_method_entry(THREAD),
+                  handle_exception);
+        }
+      }
+#endif /* VM_JVMTI */
+      if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY ) == JVMPI_EVENT_ENABLED ||
+          *jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY2) == JVMPI_EVENT_ENABLED) {
+        oop rcvr;
+        if (istate->method()->is_static()) {
+          rcvr = NULL;
+        } else {
+          rcvr = LOCALS_OBJECT(0);
+        }
+        CALL_VM(SharedRuntime::jvmpi_method_entry(THREAD, istate->method(),
+                rcvr), handle_exception);
+      }
+
+      goto run;
+    }
+
+    case popping_frame: {
+      // returned from a java call to pop the frame, restart the call
+      // clear the message so we don't confuse ourselves later
+      assert(THREAD->pop_frame_in_process(), "wrong frame pop state");
+      istate->set_msg(no_request);
+      THREAD->clr_pop_frame_in_process();
+      goto run;
+    }
+
+    case method_resume: {
+#ifdef HACK
+      {
+        ResourceMark rm;
+        char *method_name = istate->method()->name_and_sig_as_C_string();
+        if (strstr(method_name, "SecurityManager$1") != NULL) os::breakpoint();
+      }
+#endif // HACK
+      // returned from a java call, continue executing.
+      if (THREAD->pop_frame_pending() && !THREAD->pop_frame_in_process()) {
+        goto handle_Pop_Frame;
+      }
+
+      if (THREAD->has_pending_exception()) goto handle_exception;
+      // Update the pc by the saved amount of the invoke bytecode size
+      UPDATE_PC(istate->bcp_advance());
+      goto run;
+    }
+
+    case deopt_resume2: {
+      // Returned from an opcode that will reexecute. Deopt was
+      // a result of a PopFrame request.
+      //
+      goto run;
+    }
+
+    case deopt_resume: {
+      // Returned from an opcode that has completed. The stack has
+      // the result all we need to do is skip across the bytecode
+      // and continue (assuming there is no exception pending)
+      //
+      // compute continuation length
+      //
+      UPDATE_PC(Bytecodes::length_at(pc));
+      if (THREAD->has_pending_exception()) goto handle_exception;
+      goto run;
+    }
+    case got_monitors: {
+      // continue locking now that we have a monitor to use
+      // we expect to find newly allocated monitor at the "top" of the monitor stack.
+      oop lockee = STACK_OBJECT(-1);
+      // derefing's lockee ought to provoke implicit null check
+      // find a free monitor
+      BasicObjectLock* entry = (BasicObjectLock*) istate->stack_base();
+      assert(entry->obj() == NULL, "Frame manager didn't allocate the monitor");
+      entry->set_obj(lockee);
+
+      markOop displaced = lockee->mark()->set_unlocked();
+      entry->lock()->set_displaced_header(displaced);
+      if (Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
+        // Is it simple recursive case?
+        if (THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
+          entry->lock()->set_displaced_header(NULL);
+        } else {
+          CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+        }
+      }
+      UPDATE_PC_AND_TOS(1, -1);
+      goto run;
+    }
+    default: {
+      fatal("Unexpected message from frame manager");
+    }
+  }
+
+run:
+
+  DO_UPDATE_INSTRUCTION_COUNT(*pc)
+  DEBUGGER_SINGLE_STEP_NOTIFY();
+#ifdef PREFETCH_OPCCODE
+  opcode = *pc;  /* prefetch first opcode */
+#endif
+
+#ifndef USELABELS
+  while (1)
+#endif
+  {
+#ifndef PREFETCH_OPCCODE
+      opcode = *pc;
+#endif
+      // Seems like this happens twice per opcode. At worst this is only
+      // need at entry to the loop.
+      // DEBUGGER_SINGLE_STEP_NOTIFY();
+      /* Using this labels avoids double breakpoints when quickening and
+       * when returing from transition frames.
+       */
+  opcode_switch:
+      assert(istate == orig, "Corrupted istate");
+      /* QQQ Hmm this has knowledge of direction, ought to be a stack method */
+      assert(topOfStack >= istate->stack_limit(), "Stack overrun");
+      assert(topOfStack < istate->stack_base(), "Stack underrun");
+
+#ifdef USELABELS
+      DISPATCH(opcode);
+#else
+      switch (opcode)
+#endif
+      {
+      CASE(_nop):
+          UPDATE_PC_AND_CONTINUE(1);
+
+          /* Push miscellaneous constants onto the stack. */
+
+      CASE(_aconst_null):
+          SET_STACK_OBJECT(NULL, 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+
+#undef  OPC_CONST_n
+#define OPC_CONST_n(opcode, const_type, value)                          \
+      CASE(opcode):                                                     \
+          SET_STACK_ ## const_type(value, 0);                           \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+
+          OPC_CONST_n(_iconst_m1,   INT,       -1);
+          OPC_CONST_n(_iconst_0,    INT,        0);
+          OPC_CONST_n(_iconst_1,    INT,        1);
+          OPC_CONST_n(_iconst_2,    INT,        2);
+          OPC_CONST_n(_iconst_3,    INT,        3);
+          OPC_CONST_n(_iconst_4,    INT,        4);
+          OPC_CONST_n(_iconst_5,    INT,        5);
+          OPC_CONST_n(_fconst_0,    FLOAT,      0.0);
+          OPC_CONST_n(_fconst_1,    FLOAT,      1.0);
+          OPC_CONST_n(_fconst_2,    FLOAT,      2.0);
+
+#if 0
+#undef  OPC_CONST2_n
+#define OPC_CONST2_n(opcname, value, key)                               \
+      CASE(_##opcname):                                                 \
+      {                                                                 \
+         VM##key##2Jvm(&STACK_INFO(DTOS(0)).raw,                        \
+             VM##key##Const##value());                                  \
+         UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);                          \
+      }
+#endif
+
+#undef  OPC_CONST2_n
+#define OPC_CONST2_n(opcname, value, key, kind)                         \
+      CASE(_##opcname):                                                 \
+      {                                                                 \
+          SET_STACK_ ## kind(VM##key##Const##value(), 1);               \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);                         \
+      }
+         OPC_CONST2_n(dconst_0, Zero, double, DOUBLE);
+         OPC_CONST2_n(dconst_1, One,  double, DOUBLE);
+         OPC_CONST2_n(lconst_0, Zero, long, LONG);
+         OPC_CONST2_n(lconst_1, One,  long, LONG);
+
+         /* Load constant from constant pool: */
+
+          /* Push a 1-byte signed integer value onto the stack. */
+      CASE(_bipush):
+          SET_STACK_INT((jbyte)(pc[1]), 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, 1);
+
+          /* Push a 2-byte signed integer constant onto the stack. */
+      CASE(_sipush):
+          SET_STACK_INT((int16_t)Bytes::get_Java_u2(pc + 1), 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(3, 1);
+
+          /* load from local variable */
+
+      CASE(_aload):
+          SET_STACK_OBJECT(LOCALS_OBJECT(pc[1]), 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, 1);
+
+      CASE(_iload):
+      CASE(_fload):
+          SET_STACK_SLOT(LOCALS_SLOT(pc[1]), 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, 1);
+
+      CASE(_lload):
+          SET_STACK_LONG_FROM_ADDR(LOCALS_LONG_AT(pc[1]), 1);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, 2);
+
+      CASE(_dload):
+          SET_STACK_DOUBLE_FROM_ADDR(LOCALS_DOUBLE_AT(pc[1]), 1);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, 2);
+
+#undef  OPC_LOAD_n
+#define OPC_LOAD_n(num)                                                 \
+      CASE(_aload_##num):                                               \
+          SET_STACK_OBJECT(LOCALS_OBJECT(num), 0);                      \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);                         \
+                                                                        \
+      CASE(_iload_##num):                                               \
+      CASE(_fload_##num):                                               \
+          SET_STACK_SLOT(LOCALS_SLOT(num), 0);                          \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);                         \
+                                                                        \
+      CASE(_lload_##num):                                               \
+          SET_STACK_LONG_FROM_ADDR(LOCALS_LONG_AT(num), 1);             \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);                         \
+      CASE(_dload_##num):                                               \
+          SET_STACK_DOUBLE_FROM_ADDR(LOCALS_DOUBLE_AT(num), 1);         \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+
+          OPC_LOAD_n(0);
+          OPC_LOAD_n(1);
+          OPC_LOAD_n(2);
+          OPC_LOAD_n(3);
+
+          /* store to a local variable */
+
+      CASE(_astore):
+          astore(topOfStack, -1, locals, pc[1]);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, -1);
+
+      CASE(_istore):
+      CASE(_fstore):
+          SET_LOCALS_SLOT(STACK_SLOT(-1), pc[1]);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, -1);
+
+      CASE(_lstore):
+          SET_LOCALS_LONG(STACK_LONG(-1), pc[1]);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, -2);
+
+      CASE(_dstore):
+          SET_LOCALS_DOUBLE(STACK_DOUBLE(-1), pc[1]);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(2, -2);
+
+      CASE(_wide): {
+          uint16_t reg = Bytes::get_Java_u2(pc + 2);
+
+          opcode = pc[1];
+          switch(opcode) {
+              case Bytecodes::_aload:
+                  SET_STACK_OBJECT(LOCALS_OBJECT(reg), 0);
+                  UPDATE_PC_AND_TOS_AND_CONTINUE(4, 1);
+
+              case Bytecodes::_iload:
+              case Bytecodes::_fload:
+                  SET_STACK_SLOT(LOCALS_SLOT(reg), 0);
+                  UPDATE_PC_AND_TOS_AND_CONTINUE(4, 1);
+
+              case Bytecodes::_lload:
+                  SET_STACK_LONG_FROM_ADDR(LOCALS_LONG_AT(reg), 1);
+                  UPDATE_PC_AND_TOS_AND_CONTINUE(4, 2);
+
+              case Bytecodes::_dload:
+                  SET_STACK_DOUBLE_FROM_ADDR(LOCALS_LONG_AT(reg), 1);
+                  UPDATE_PC_AND_TOS_AND_CONTINUE(4, 2);
+
+              case Bytecodes::_astore:
+                  astore(topOfStack, -1, locals, reg);
+                  UPDATE_PC_AND_TOS_AND_CONTINUE(4, -1);
+
+              case Bytecodes::_istore:
+              case Bytecodes::_fstore:
+                  SET_LOCALS_SLOT(STACK_SLOT(-1), reg);
+                  UPDATE_PC_AND_TOS_AND_CONTINUE(4, -1);
+
+              case Bytecodes::_lstore:
+                  SET_LOCALS_LONG(STACK_LONG(-1), reg);
+                  UPDATE_PC_AND_TOS_AND_CONTINUE(4, -2);
+
+              case Bytecodes::_dstore:
+                  SET_LOCALS_DOUBLE(STACK_DOUBLE(-1), reg);
+                  UPDATE_PC_AND_TOS_AND_CONTINUE(4, -2);
+
+              case Bytecodes::_iinc: {
+                  int16_t offset = (int16_t)Bytes::get_Java_u2(pc+4);
+                  // Be nice to see what this generates.... QQQ
+                  SET_LOCALS_INT(LOCALS_INT(reg) + offset, reg);
+                  UPDATE_PC_AND_CONTINUE(6);
+              }
+              case Bytecodes::_ret:
+                  pc = istate->method()->code_base() + (intptr_t)(LOCALS_SLOT(reg));
+                  UPDATE_PC_AND_CONTINUE(0);
+              default:
+                  VM_JAVA_ERROR(vmSymbols::java_lang_InternalError(), "undefined opcode");
+          }
+      }
+
+
+#undef  OPC_STORE_n
+#define OPC_STORE_n(num)                                                \
+      CASE(_astore_##num):                                              \
+          astore(topOfStack, -1, locals, num);                          \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);                        \
+      CASE(_istore_##num):                                              \
+      CASE(_fstore_##num):                                              \
+          SET_LOCALS_SLOT(STACK_SLOT(-1), num);                         \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
+
+          OPC_STORE_n(0);
+          OPC_STORE_n(1);
+          OPC_STORE_n(2);
+          OPC_STORE_n(3);
+
+#undef  OPC_DSTORE_n
+#define OPC_DSTORE_n(num)                                               \
+      CASE(_dstore_##num):                                              \
+          SET_LOCALS_DOUBLE(STACK_DOUBLE(-1), num);                     \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -2);                        \
+      CASE(_lstore_##num):                                              \
+          SET_LOCALS_LONG(STACK_LONG(-1), num);                         \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -2);
+
+          OPC_DSTORE_n(0);
+          OPC_DSTORE_n(1);
+          OPC_DSTORE_n(2);
+          OPC_DSTORE_n(3);
+
+          /* stack pop, dup, and insert opcodes */
+
+
+      CASE(_pop):                /* Discard the top item on the stack */
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
+
+
+      CASE(_pop2):               /* Discard the top 2 items on the stack */
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -2);
+
+
+      CASE(_dup):               /* Duplicate the top item on the stack */
+          dup(topOfStack);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+
+      CASE(_dup2):              /* Duplicate the top 2 items on the stack */
+          dup2(topOfStack);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+
+      CASE(_dup_x1):    /* insert top word two down */
+          dup_x1(topOfStack);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+
+      CASE(_dup_x2):    /* insert top word three down  */
+          dup_x2(topOfStack);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+
+      CASE(_dup2_x1):   /* insert top 2 slots three down */
+          dup2_x1(topOfStack);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+
+      CASE(_dup2_x2):   /* insert top 2 slots four down */
+          dup2_x2(topOfStack);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+
+      CASE(_swap): {        /* swap top two elements on the stack */
+          swap(topOfStack);
+          UPDATE_PC_AND_CONTINUE(1);
+      }
+
+          /* Perform various binary integer operations */
+
+#undef  OPC_INT_BINARY
+#define OPC_INT_BINARY(opcname, opname, test)                           \
+      CASE(_i##opcname):                                                \
+          if (test && (STACK_INT(-1) == 0)) {                           \
+              VM_JAVA_ERROR(vmSymbols::java_lang_ArithmeticException(), \
+                            "/ by int zero");                           \
+          }                                                             \
+          SET_STACK_INT(VMint##opname(STACK_INT(-2),                    \
+                                      STACK_INT(-1)),                   \
+                                      -2);                              \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);                        \
+      CASE(_l##opcname):                                                \
+      {                                                                 \
+          if (test) {                                                   \
+            jlong l1 = STACK_LONG(-1);                                  \
+            if (VMlongEqz(l1)) {                                        \
+              VM_JAVA_ERROR(vmSymbols::java_lang_ArithmeticException(), \
+                            "/ by long zero");                          \
+            }                                                           \
+          }                                                             \
+          /* First long at (-1,-2) next long at (-3,-4) */              \
+          SET_STACK_LONG(VMlong##opname(STACK_LONG(-3),                 \
+                                        STACK_LONG(-1)),                \
+                                        -3);                            \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -2);                        \
+      }
+
+      OPC_INT_BINARY(add, Add, 0);
+      OPC_INT_BINARY(sub, Sub, 0);
+      OPC_INT_BINARY(mul, Mul, 0);
+      OPC_INT_BINARY(and, And, 0);
+      OPC_INT_BINARY(or,  Or,  0);
+      OPC_INT_BINARY(xor, Xor, 0);
+      OPC_INT_BINARY(div, Div, 1);
+      OPC_INT_BINARY(rem, Rem, 1);
+
+
+      /* Perform various binary floating number operations */
+      /* On some machine/platforms/compilers div zero check can be implicit */
+
+#undef  OPC_FLOAT_BINARY
+#define OPC_FLOAT_BINARY(opcname, opname)                                  \
+      CASE(_d##opcname): {                                                 \
+          SET_STACK_DOUBLE(VMdouble##opname(STACK_DOUBLE(-3),              \
+                                            STACK_DOUBLE(-1)),             \
+                                            -3);                           \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -2);                           \
+      }                                                                    \
+      CASE(_f##opcname):                                                   \
+          SET_STACK_FLOAT(VMfloat##opname(STACK_FLOAT(-2),                 \
+                                          STACK_FLOAT(-1)),                \
+                                          -2);                             \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
+
+
+     OPC_FLOAT_BINARY(add, Add);
+     OPC_FLOAT_BINARY(sub, Sub);
+     OPC_FLOAT_BINARY(mul, Mul);
+     OPC_FLOAT_BINARY(div, Div);
+     OPC_FLOAT_BINARY(rem, Rem);
+
+      /* Shift operations
+       * Shift left int and long: ishl, lshl
+       * Logical shift right int and long w/zero extension: iushr, lushr
+       * Arithmetic shift right int and long w/sign extension: ishr, lshr
+       */
+
+#undef  OPC_SHIFT_BINARY
+#define OPC_SHIFT_BINARY(opcname, opname)                               \
+      CASE(_i##opcname):                                                \
+         SET_STACK_INT(VMint##opname(STACK_INT(-2),                     \
+                                     STACK_INT(-1)),                    \
+                                     -2);                               \
+         UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);                         \
+      CASE(_l##opcname):                                                \
+      {                                                                 \
+         SET_STACK_LONG(VMlong##opname(STACK_LONG(-2),                  \
+                                       STACK_INT(-1)),                  \
+                                       -2);                             \
+         UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);                         \
+      }
+
+      OPC_SHIFT_BINARY(shl, Shl);
+      OPC_SHIFT_BINARY(shr, Shr);
+      OPC_SHIFT_BINARY(ushr, Ushr);
+
+     /* Increment local variable by constant */
+      CASE(_iinc):
+      {
+          // locals[pc[1]].j.i += (jbyte)(pc[2]);
+          SET_LOCALS_INT(LOCALS_INT(pc[1]) + (jbyte)(pc[2]), pc[1]);
+          UPDATE_PC_AND_CONTINUE(3);
+      }
+
+     /* negate the value on the top of the stack */
+
+      CASE(_ineg):
+         SET_STACK_INT(VMintNeg(STACK_INT(-1)), -1);
+         UPDATE_PC_AND_CONTINUE(1);
+
+      CASE(_fneg):
+         SET_STACK_FLOAT(VMfloatNeg(STACK_FLOAT(-1)), -1);
+         UPDATE_PC_AND_CONTINUE(1);
+
+      CASE(_lneg):
+      {
+         SET_STACK_LONG(VMlongNeg(STACK_LONG(-1)), -1);
+         UPDATE_PC_AND_CONTINUE(1);
+      }
+
+      CASE(_dneg):
+      {
+         SET_STACK_DOUBLE(VMdoubleNeg(STACK_DOUBLE(-1)), -1);
+         UPDATE_PC_AND_CONTINUE(1);
+      }
+
+      /* Conversion operations */
+
+      CASE(_i2f):       /* convert top of stack int to float */
+         SET_STACK_FLOAT(VMint2Float(STACK_INT(-1)), -1);
+         UPDATE_PC_AND_CONTINUE(1);
+
+      CASE(_i2l):       /* convert top of stack int to long */
+      {
+          // this is ugly QQQ
+          jlong r = VMint2Long(STACK_INT(-1));
+          MORE_STACK(-1); // Pop
+          SET_STACK_LONG(r, 1);
+
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+      }
+
+      CASE(_i2d):       /* convert top of stack int to double */
+      {
+          // this is ugly QQQ (why cast to jlong?? )
+          jdouble r = (jlong)STACK_INT(-1);
+          MORE_STACK(-1); // Pop
+          SET_STACK_DOUBLE(r, 1);
+
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+      }
+
+      CASE(_l2i):       /* convert top of stack long to int */
+      {
+          jint r = VMlong2Int(STACK_LONG(-1));
+          MORE_STACK(-2); // Pop
+          SET_STACK_INT(r, 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+      }
+
+      CASE(_l2f):   /* convert top of stack long to float */
+      {
+          jlong r = STACK_LONG(-1);
+          MORE_STACK(-2); // Pop
+          SET_STACK_FLOAT(VMlong2Float(r), 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+      }
+
+      CASE(_l2d):       /* convert top of stack long to double */
+      {
+          jlong r = STACK_LONG(-1);
+          MORE_STACK(-2); // Pop
+          SET_STACK_DOUBLE(VMlong2Double(r), 1);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+      }
+
+      CASE(_f2i):  /* Convert top of stack float to int */
+          SET_STACK_INT(SharedRuntime::f2i(STACK_FLOAT(-1)), -1);
+          UPDATE_PC_AND_CONTINUE(1);
+
+      CASE(_f2l):  /* convert top of stack float to long */
+      {
+          jlong r = SharedRuntime::f2l(STACK_FLOAT(-1));
+          MORE_STACK(-1); // POP
+          SET_STACK_LONG(r, 1);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+      }
+
+      CASE(_f2d):  /* convert top of stack float to double */
+      {
+          jfloat f;
+          jdouble r;
+          f = STACK_FLOAT(-1);
+#ifdef IA64
+          // IA64 gcc bug
+          r = ( f == 0.0f ) ? (jdouble) f : (jdouble) f + ia64_double_zero;
+#else
+          r = (jdouble) f;
+#endif
+          MORE_STACK(-1); // POP
+          SET_STACK_DOUBLE(r, 1);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+      }
+
+      CASE(_d2i): /* convert top of stack double to int */
+      {
+          jint r1 = SharedRuntime::d2i(STACK_DOUBLE(-1));
+          MORE_STACK(-2);
+          SET_STACK_INT(r1, 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+      }
+
+      CASE(_d2f): /* convert top of stack double to float */
+      {
+          jfloat r1 = VMdouble2Float(STACK_DOUBLE(-1));
+          MORE_STACK(-2);
+          SET_STACK_FLOAT(r1, 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+      }
+
+      CASE(_d2l): /* convert top of stack double to long */
+      {
+          jlong r1 = SharedRuntime::d2l(STACK_DOUBLE(-1));
+          MORE_STACK(-2);
+          SET_STACK_LONG(r1, 1);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 2);
+      }
+
+      CASE(_i2b):
+          SET_STACK_INT(VMint2Byte(STACK_INT(-1)), -1);
+          UPDATE_PC_AND_CONTINUE(1);
+
+      CASE(_i2c):
+          SET_STACK_INT(VMint2Char(STACK_INT(-1)), -1);
+          UPDATE_PC_AND_CONTINUE(1);
+
+      CASE(_i2s):
+          SET_STACK_INT(VMint2Short(STACK_INT(-1)), -1);
+          UPDATE_PC_AND_CONTINUE(1);
+
+      /* comparison operators */
+
+
+#define COMPARISON_OP(name, comparison)                                      \
+      CASE(_if_icmp##name): {                                                \
+          int skip = (STACK_INT(-2) comparison STACK_INT(-1))                \
+                      ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
+          address branch_pc = pc;                                            \
+          UPDATE_PC_AND_TOS(skip, -2);                                       \
+          DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
+          CONTINUE;                                                          \
+      }                                                                      \
+      CASE(_if##name): {                                                     \
+          int skip = (STACK_INT(-1) comparison 0)                            \
+                      ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
+          address branch_pc = pc;                                            \
+          UPDATE_PC_AND_TOS(skip, -1);                                       \
+          DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
+          CONTINUE;                                                          \
+      }
+
+#define COMPARISON_OP2(name, comparison)                                     \
+      COMPARISON_OP(name, comparison)                                        \
+      CASE(_if_acmp##name): {                                                \
+          int skip = (STACK_OBJECT(-2) comparison STACK_OBJECT(-1))          \
+                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;            \
+          address branch_pc = pc;                                            \
+          UPDATE_PC_AND_TOS(skip, -2);                                       \
+          DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
+          CONTINUE;                                                          \
+      }
+
+#define NULL_COMPARISON_NOT_OP(name)                                         \
+      CASE(_if##name): {                                                     \
+          int skip = (!(STACK_OBJECT(-1) == 0))                              \
+                      ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
+          address branch_pc = pc;                                            \
+          UPDATE_PC_AND_TOS(skip, -1);                                       \
+          DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
+          CONTINUE;                                                          \
+      }
+
+#define NULL_COMPARISON_OP(name)                                             \
+      CASE(_if##name): {                                                     \
+          int skip = ((STACK_OBJECT(-1) == 0))                               \
+                      ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
+          address branch_pc = pc;                                            \
+          UPDATE_PC_AND_TOS(skip, -1);                                       \
+          DO_BACKEDGE_CHECKS(skip, branch_pc);                               \
+          CONTINUE;                                                          \
+      }
+      COMPARISON_OP(lt, <);
+      COMPARISON_OP(gt, >);
+      COMPARISON_OP(le, <=);
+      COMPARISON_OP(ge, >=);
+      COMPARISON_OP2(eq, ==);  /* include ref comparison */
+      COMPARISON_OP2(ne, !=);  /* include ref comparison */
+      NULL_COMPARISON_OP(null);
+      NULL_COMPARISON_NOT_OP(nonnull);
+
+      /* Goto pc at specified offset in switch table. */
+
+      CASE(_tableswitch): {
+          jint* lpc  = (jint*)VMalignWordUp(pc+1);
+          int32_t  key  = STACK_INT(-1);
+          int32_t  low  = Bytes::get_Java_u4((address)&lpc[1]);
+          int32_t  high = Bytes::get_Java_u4((address)&lpc[2]);
+          int32_t  skip;
+          key -= low;
+          skip = ((uint32_t) key > (uint32_t)(high - low))
+                      ? Bytes::get_Java_u4((address)&lpc[0])
+                      : Bytes::get_Java_u4((address)&lpc[key + 3]);
+          // Does this really need a full backedge check (osr?)
+          address branch_pc = pc;
+          UPDATE_PC_AND_TOS(skip, -1);
+          DO_BACKEDGE_CHECKS(skip, branch_pc);
+          CONTINUE;
+      }
+
+      /* Goto pc whose table entry matches specified key */
+
+      CASE(_lookupswitch): {
+          jint* lpc  = (jint*)VMalignWordUp(pc+1);
+          int32_t  key  = STACK_INT(-1);
+          int32_t  skip = Bytes::get_Java_u4((address) lpc); /* default amount */
+          int32_t  npairs = Bytes::get_Java_u4((address) &lpc[1]);
+          while (--npairs >= 0) {
+              lpc += 2;
+              if (key == (int32_t)Bytes::get_Java_u4((address)lpc)) {
+                  skip = Bytes::get_Java_u4((address)&lpc[1]);
+                  break;
+              }
+          }
+          address branch_pc = pc;
+          UPDATE_PC_AND_TOS(skip, -1);
+          DO_BACKEDGE_CHECKS(skip, branch_pc);
+          CONTINUE;
+      }
+
+      CASE(_fcmpl):
+      CASE(_fcmpg):
+      {
+          SET_STACK_INT(VMfloatCompare(STACK_FLOAT(-2),
+                                        STACK_FLOAT(-1),
+                                        (opcode == Bytecodes::_fcmpl ? -1 : 1)),
+                        -2);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
+      }
+
+      CASE(_dcmpl):
+      CASE(_dcmpg):
+      {
+          int r = VMdoubleCompare(STACK_DOUBLE(-3),
+                                  STACK_DOUBLE(-1),
+                                  (opcode == Bytecodes::_dcmpl ? -1 : 1));
+          MORE_STACK(-4); // Pop
+          SET_STACK_INT(r, 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+      }
+
+      CASE(_lcmp):
+      {
+          int r = VMlongCompare(STACK_LONG(-3), STACK_LONG(-1));
+          MORE_STACK(-4);
+          SET_STACK_INT(r, 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, 1);
+      }
+
+
+      /* Return from a method */
+
+      CASE(_areturn):
+      CASE(_ireturn):
+      CASE(_freturn):
+      {
+          // Allow a safepoint before returning to frame manager.
+          SAFEPOINT;
+
+          goto handle_return;
+      }
+
+      CASE(_lreturn):
+      CASE(_dreturn):
+      {
+          // Allow a safepoint before returning to frame manager.
+          SAFEPOINT;
+          goto handle_return;
+      }
+
+      CASE(_return_register_finalizer): {
+
+          oop rcvr = LOCALS_OBJECT(0);
+          if (rcvr->klass()->klass_part()->has_finalizer()) {
+            CALL_VM(InterpreterRuntime::register_finalizer(THREAD, rcvr), handle_exception);
+          }
+          goto handle_return;
+      }
+      CASE(_return): {
+
+          // Allow a safepoint before returning to frame manager.
+          SAFEPOINT;
+          goto handle_return;
+      }
+
+      /* Array access byte-codes */
+
+      /* Every array access byte-code starts out like this */
+#define ARRAY_INTRO(arrayOff)                                                  \
+      arrayOopDesc* arrObj = (arrayOopDesc*)STACK_OBJECT(arrayOff);            \
+      jint     index  = STACK_INT(arrayOff + 1);                               \
+      char message[jintAsStringSize];                                          \
+      CHECK_NULL(arrObj);                                                      \
+      if ((uint32_t)index >= (uint32_t)arrObj->length()) {                     \
+          sprintf(message, "%d", index);                                       \
+          VM_JAVA_ERROR(vmSymbols::java_lang_ArrayIndexOutOfBoundsException(), \
+                        message);                                              \
+      }
+
+      /* 32-bit loads. These handle conversion from < 32-bit types */
+#define ARRAY_LOADTO32(T, T2, format, stackRes, extra)                                \
+      {                                                                               \
+          ARRAY_INTRO(-2);                                                            \
+          extra;                                                                      \
+          SET_ ## stackRes(*(T2 *)(((address) arrObj->base(T)) + index * sizeof(T2)), \
+                           -2);                                                       \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);                                      \
+      }
+
+      /* 64-bit loads */
+#define ARRAY_LOADTO64(T,T2, stackRes, extra)                                              \
+      {                                                                                    \
+          ARRAY_INTRO(-2);                                                                 \
+          SET_ ## stackRes(*(T2 *)(((address) arrObj->base(T)) + index * sizeof(T2)), -1); \
+          extra;                                                                           \
+          UPDATE_PC_AND_CONTINUE(1);                                            \
+      }
+
+      CASE(_iaload):
+          ARRAY_LOADTO32(T_INT, jint,   "%d",   STACK_INT, 0);
+      CASE(_faload):
+          ARRAY_LOADTO32(T_FLOAT, jfloat, "%f",   STACK_FLOAT, 0);
+      CASE(_aaload):
+          ARRAY_LOADTO32(T_OBJECT, oop,   INTPTR_FORMAT, STACK_OBJECT, 0);
+      CASE(_baload):
+          ARRAY_LOADTO32(T_BYTE, jbyte,  "%d",   STACK_INT, 0);
+      CASE(_caload):
+          ARRAY_LOADTO32(T_CHAR,  jchar, "%d",   STACK_INT, 0);
+      CASE(_saload):
+          ARRAY_LOADTO32(T_SHORT, jshort, "%d",   STACK_INT, 0);
+      CASE(_laload):
+          ARRAY_LOADTO64(T_LONG, jlong, STACK_LONG, 0);
+      CASE(_daload):
+          ARRAY_LOADTO64(T_DOUBLE, jdouble, STACK_DOUBLE, 0);
+
+      /* 32-bit stores. These handle conversion to < 32-bit types */
+#define ARRAY_STOREFROM32(T, T2, format, stackSrc, extra)                            \
+      {                                                                              \
+          ARRAY_INTRO(-3);                                                           \
+          extra;                                                                     \
+          *(T2 *)(((address) arrObj->base(T)) + index * sizeof(T2)) = stackSrc( -1); \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -3);                                     \
+      }
+
+      /* 64-bit stores */
+#define ARRAY_STOREFROM64(T, T2, stackSrc, extra)                                    \
+      {                                                                              \
+          ARRAY_INTRO(-4);                                                           \
+          extra;                                                                     \
+          *(T2 *)(((address) arrObj->base(T)) + index * sizeof(T2)) = stackSrc( -1); \
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -4);                                     \
+      }
+
+      CASE(_iastore):
+          ARRAY_STOREFROM32(T_INT, jint,   "%d",   STACK_INT, 0);
+      CASE(_fastore):
+          ARRAY_STOREFROM32(T_FLOAT, jfloat, "%f",   STACK_FLOAT, 0);
+      /*
+       * This one looks different because of the assignability check
+       */
+      CASE(_aastore): {
+          oop rhsObject = STACK_OBJECT(-1);
+          ARRAY_INTRO( -3);
+          // arrObj, index are set
+          if (rhsObject != NULL) {
+            /* Check assignability of rhsObject into arrObj */
+            klassOop rhsKlassOop = rhsObject->klass(); // EBX (subclass)
+            assert(arrObj->klass()->klass()->klass_part()->oop_is_objArrayKlass(), "Ack not an objArrayKlass");
+            klassOop elemKlassOop = ((objArrayKlass*) arrObj->klass()->klass_part())->element_klass(); // superklass EAX
+            //
+            // Check for compatibilty. This check must not GC!!
+            // Seems way more expensive now that we must dispatch
+            //
+            if (rhsKlassOop != elemKlassOop && !rhsKlassOop->klass_part()->is_subtype_of(elemKlassOop)) { // ebx->is...
+              VM_JAVA_ERROR(vmSymbols::java_lang_ArrayStoreException(), "");
+            }
+          }
+          oop* elem_loc = (oop*)(((address) arrObj->base(T_OBJECT)) + index * sizeof(oop));
+          // *(oop*)(((address) arrObj->base(T_OBJECT)) + index * sizeof(oop)) = rhsObject;
+          *elem_loc = rhsObject;
+          // Mark the card
+          OrderAccess::release_store(&BYTE_MAP_BASE[(uintptr_t)elem_loc >> CardTableModRefBS::card_shift], 0);
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -3);
+      }
+      CASE(_bastore):
+          ARRAY_STOREFROM32(T_BYTE, jbyte,  "%d",   STACK_INT, 0);
+      CASE(_castore):
+          ARRAY_STOREFROM32(T_CHAR, jchar,  "%d",   STACK_INT, 0);
+      CASE(_sastore):
+          ARRAY_STOREFROM32(T_SHORT, jshort, "%d",   STACK_INT, 0);
+      CASE(_lastore):
+          ARRAY_STOREFROM64(T_LONG, jlong, STACK_LONG, 0);
+      CASE(_dastore):
+          ARRAY_STOREFROM64(T_DOUBLE, jdouble, STACK_DOUBLE, 0);
+
+      CASE(_arraylength):
+      {
+          arrayOopDesc *ary = (arrayOopDesc *) STACK_OBJECT(-1);
+          CHECK_NULL(ary);
+          SET_STACK_INT(ary->length(), -1);
+          UPDATE_PC_AND_CONTINUE(1);
+      }
+
+      /* monitorenter and monitorexit for locking/unlocking an object */
+
+      CASE(_monitorenter): {
+        oop lockee = STACK_OBJECT(-1);
+        // derefing's lockee ought to provoke implicit null check
+        CHECK_NULL(lockee);
+        // find a free monitor or one already allocated for this object
+        // if we find a matching object then we need a new monitor
+        // since this is recursive enter
+        BasicObjectLock* limit = istate->monitor_base();
+        BasicObjectLock* most_recent = (BasicObjectLock*) istate->stack_base();
+        BasicObjectLock* entry = NULL;
+        while (most_recent != limit ) {
+          if (most_recent->obj() == NULL) entry = most_recent;
+          else if (most_recent->obj() == lockee) break;
+          most_recent++;
+        }
+        if (entry != NULL) {
+          entry->set_obj(lockee);
+          markOop displaced = lockee->mark()->set_unlocked();
+          entry->lock()->set_displaced_header(displaced);
+          if (Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
+            // Is it simple recursive case?
+            if (THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
+              entry->lock()->set_displaced_header(NULL);
+            } else {
+              CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
+            }
+          }
+          UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
+        } else {
+          istate->set_msg(more_monitors);
+          // HACK FIX LATER
+          // Why was this needed? Seems to be useless now
+          // istate->set_callee((methodOop) lockee);
+          UPDATE_PC_AND_RETURN(0); // Re-execute
+        }
+      }
+
+      CASE(_monitorexit): {
+        oop lockee = STACK_OBJECT(-1);
+        CHECK_NULL(lockee);
+        // derefing's lockee ought to provoke implicit null check
+        // find our monitor slot
+        BasicObjectLock* limit = istate->monitor_base();
+        BasicObjectLock* most_recent = (BasicObjectLock*) istate->stack_base();
+        while (most_recent != limit ) {
+          if ((most_recent)->obj() == lockee) {
+            BasicLock* lock = most_recent->lock();
+            markOop header = lock->displaced_header();
+            most_recent->set_obj(NULL);
+            // If it isn't recursive we either must swap old header or call the runtime
+            if (header != NULL) {
+              if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
+                // restore object for the slow case
+                most_recent->set_obj(lockee);
+                CALL_VM(InterpreterRuntime::monitorexit(THREAD, most_recent), handle_exception);
+              }
+            }
+            UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
+          }
+          most_recent++;
+        }
+        // Need to throw illegal monitor state exception
+        CALL_VM(InterpreterRuntime::throw_illegal_monitor_state_exception(THREAD), handle_exception);
+        // Should never reach here...
+        assert(false, "Should have thrown illegal monitor exception");
+      }
+
+      /* All of the non-quick opcodes. */
+
+      /* -Set clobbersCpIndex true if the quickened opcode clobbers the
+       *  constant pool index in the instruction.
+       */
+      CASE(_getfield):
+      CASE(_getstatic):
+        {
+          u2 index;
+          ConstantPoolCacheEntry* cache;
+          index = Bytes::get_native_u2(pc+1);
+
+          // QQQ Need to make this as inlined as possible. Probably need to
+          // split all the bytecode cases out so c++ compiler has a chance
+          // for constant prop to fold everything possible away.
+
+          cache = cp->entry_at(index);
+          if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+            CALL_VM(InterpreterRuntime::resolve_get_put(THREAD, (Bytecodes::Code)opcode),
+                    handle_exception);
+            cache = cp->entry_at(index);
+          }
+
+#ifdef VM_JVMTI
+          if (_jvmti_interp_events) {
+            int *count_addr;
+            oop obj;
+            // Check to see if a field modification watch has been set
+            // before we take the time to call into the VM.
+            count_addr = (int *)JvmtiExport::get_field_access_count_addr();
+            if ( *count_addr > 0 ) {
+              if ((Bytecodes::Code)opcode == Bytecodes::_getstatic) {
+                obj = (oop)NULL;
+              } else {
+                obj = (oop) STACK_OBJECT(-1);
+              }
+              CALL_VM(InterpreterRuntime::post_field_access(THREAD,
+                                          obj,
+                                          cache),
+                                          handle_exception);
+            }
+          }
+#endif /* VM_JVMTI */
+
+          oop obj;
+          if ((Bytecodes::Code)opcode == Bytecodes::_getstatic) {
+            obj = (oop) cache->f1();
+            MORE_STACK(1);  // Assume single slot push
+          } else {
+            obj = (oop) STACK_OBJECT(-1);
+            CHECK_NULL(obj);
+          }
+
+          //
+          // Now store the result on the stack
+          //
+          TosState tos_type = cache->flag_state();
+          int field_offset = cache->f2();
+          if (cache->is_volatile()) {
+            if (tos_type == atos) {
+              SET_STACK_OBJECT(obj->obj_field_acquire(field_offset), -1);
+            } else if (tos_type == itos) {
+              SET_STACK_INT(obj->int_field_acquire(field_offset), -1);
+            } else if (tos_type == ltos) {
+              SET_STACK_LONG(obj->long_field_acquire(field_offset), 0);
+              MORE_STACK(1);
+            } else if (tos_type == btos) {
+              SET_STACK_INT(obj->byte_field_acquire(field_offset), -1);
+            } else if (tos_type == ctos) {
+              SET_STACK_INT(obj->char_field_acquire(field_offset), -1);
+            } else if (tos_type == stos) {
+              SET_STACK_INT(obj->short_field_acquire(field_offset), -1);
+            } else if (tos_type == ftos) {
+              SET_STACK_FLOAT(obj->float_field_acquire(field_offset), -1);
+            } else {
+              SET_STACK_DOUBLE(obj->double_field_acquire(field_offset), 0);
+              MORE_STACK(1);
+            }
+          } else {
+            if (tos_type == atos) {
+              SET_STACK_OBJECT(obj->obj_field(field_offset), -1);
+            } else if (tos_type == itos) {
+              SET_STACK_INT(obj->int_field(field_offset), -1);
+            } else if (tos_type == ltos) {
+              SET_STACK_LONG(obj->long_field(field_offset), 0);
+              MORE_STACK(1);
+            } else if (tos_type == btos) {
+              SET_STACK_INT(obj->byte_field(field_offset), -1);
+            } else if (tos_type == ctos) {
+              SET_STACK_INT(obj->char_field(field_offset), -1);
+            } else if (tos_type == stos) {
+              SET_STACK_INT(obj->short_field(field_offset), -1);
+            } else if (tos_type == ftos) {
+              SET_STACK_FLOAT(obj->float_field(field_offset), -1);
+            } else {
+              SET_STACK_DOUBLE(obj->double_field(field_offset), 0);
+              MORE_STACK(1);
+            }
+          }
+
+          UPDATE_PC_AND_CONTINUE(3);
+         }
+
+      CASE(_putfield):
+      CASE(_putstatic):
+        {
+          u2 index = Bytes::get_native_u2(pc+1);
+          ConstantPoolCacheEntry* cache = cp->entry_at(index);
+          if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+            CALL_VM(InterpreterRuntime::resolve_get_put(THREAD, (Bytecodes::Code)opcode),
+                    handle_exception);
+            cache = cp->entry_at(index);
+          }
+
+#ifdef VM_JVMTI
+          if (_jvmti_interp_events) {
+            int *count_addr;
+            oop obj;
+            // Check to see if a field modification watch has been set
+            // before we take the time to call into the VM.
+            count_addr = (int *)JvmtiExport::get_field_modification_count_addr();
+            if ( *count_addr > 0 ) {
+              if ((Bytecodes::Code)opcode == Bytecodes::_putstatic) {
+                obj = (oop)NULL;
+              }
+              else {
+                if (cache->is_long() || cache->is_double()) {
+                  obj = (oop) STACK_OBJECT(-3);
+                } else {
+                  obj = (oop) STACK_OBJECT(-2);
+                }
+              }
+
+              CALL_VM(InterpreterRuntime::post_field_modification(THREAD,
+                                          obj,
+                                          cache,
+                                          (jvalue *)STACK_SLOT(-1)),
+                                          handle_exception);
+            }
+          }
+#endif /* VM_JVMTI */
+
+          // QQQ Need to make this as inlined as possible. Probably need to split all the bytecode cases
+          // out so c++ compiler has a chance for constant prop to fold everything possible away.
+
+          oop obj;
+          int count;
+          TosState tos_type = cache->flag_state();
+
+          count = -1;
+          if (tos_type == ltos || tos_type == dtos) {
+            --count;
+          }
+          if ((Bytecodes::Code)opcode == Bytecodes::_putstatic) {
+            obj = (oop) cache->f1();
+          } else {
+            --count;
+            obj = (oop) STACK_OBJECT(count);
+            CHECK_NULL(obj);
+          }
+
+          //
+          // Now store the result
+          //
+          int field_offset = cache->f2();
+          if (cache->is_volatile()) {
+            if (tos_type == itos) {
+              obj->release_int_field_put(field_offset, STACK_INT(-1));
+            } else if (tos_type == atos) {
+              obj->release_obj_field_put(field_offset, STACK_OBJECT(-1));
+              OrderAccess::release_store(&BYTE_MAP_BASE[(uintptr_t)obj >> CardTableModRefBS::card_shift], 0);
+            } else if (tos_type == btos) {
+              obj->release_byte_field_put(field_offset, STACK_INT(-1));
+            } else if (tos_type == ltos) {
+              obj->release_long_field_put(field_offset, STACK_LONG(-1));
+            } else if (tos_type == ctos) {
+              obj->release_char_field_put(field_offset, STACK_INT(-1));
+            } else if (tos_type == stos) {
+              obj->release_short_field_put(field_offset, STACK_INT(-1));
+            } else if (tos_type == ftos) {
+              obj->release_float_field_put(field_offset, STACK_FLOAT(-1));
+            } else {
+              obj->release_double_field_put(field_offset, STACK_DOUBLE(-1));
+            }
+            OrderAccess::storeload();
+          } else {
+            if (tos_type == itos) {
+              obj->int_field_put(field_offset, STACK_INT(-1));
+            } else if (tos_type == atos) {
+              obj->obj_field_put(field_offset, STACK_OBJECT(-1));
+              OrderAccess::release_store(&BYTE_MAP_BASE[(uintptr_t)obj >> CardTableModRefBS::card_shift], 0);
+            } else if (tos_type == btos) {
+              obj->byte_field_put(field_offset, STACK_INT(-1));
+            } else if (tos_type == ltos) {
+              obj->long_field_put(field_offset, STACK_LONG(-1));
+            } else if (tos_type == ctos) {
+              obj->char_field_put(field_offset, STACK_INT(-1));
+            } else if (tos_type == stos) {
+              obj->short_field_put(field_offset, STACK_INT(-1));
+            } else if (tos_type == ftos) {
+              obj->float_field_put(field_offset, STACK_FLOAT(-1));
+            } else {
+              obj->double_field_put(field_offset, STACK_DOUBLE(-1));
+            }
+          }
+
+          UPDATE_PC_AND_TOS_AND_CONTINUE(3, count);
+        }
+
+      CASE(_new): {
+        u2 index = Bytes::get_Java_u2(pc+1);
+        constantPoolOop constants = istate->method()->constants();
+        if (!constants->tag_at(index).is_unresolved_klass()) {
+          // Make sure klass is initialized and doesn't have a finalizer
+          oop entry = (klassOop) *constants->obj_at_addr(index);
+          assert(entry->is_klass(), "Should be resolved klass");
+          klassOop k_entry = (klassOop) entry;
+          assert(k_entry->klass_part()->oop_is_instance(), "Should be instanceKlass");
+          instanceKlass* ik = (instanceKlass*) k_entry->klass_part();
+          if ( ik->is_initialized() && ik->can_be_fastpath_allocated() ) {
+            size_t obj_size = ik->size_helper();
+            oop result = NULL;
+            bool need_zero = false;
+            if (UseTLAB) {
+              result = (oop) THREAD->tlab().allocate(obj_size);
+            }
+            if (result == NULL) {
+              need_zero = true;
+              // Try allocate in shared eden
+        retry:
+              HeapWord* compare_to = *Universe::heap()->top_addr();
+              HeapWord* new_top = compare_to + obj_size;
+              if (new_top <= *Universe::heap()->end_addr()) {
+                if (Atomic::cmpxchg_ptr(new_top, Universe::heap()->top_addr(), compare_to) != compare_to) {
+                  goto retry;
+                }
+                result = (oop) compare_to;
+              }
+            }
+            if (result != NULL) {
+              // Initialize object (if nonzero size and need) and then the header
+              if (need_zero ) {
+                HeapWord* to_zero = (HeapWord*) result + sizeof(oopDesc) / oopSize;
+                obj_size -= sizeof(oopDesc) / oopSize;
+                if (obj_size > 0 ) {
+                  memset(to_zero, 0, obj_size * HeapWordSize);
+                }
+              }
+              if (UseBiasedLocking) {
+                result->set_mark(ik->prototype_header());
+              } else {
+                result->set_mark(markOopDesc::prototype());
+              }
+              result->set_klass(k_entry);
+              SET_STACK_OBJECT(result, 0);
+              UPDATE_PC_AND_TOS_AND_CONTINUE(3, 1);
+            }
+          }
+        }
+        // Slow case allocation
+        CALL_VM(InterpreterRuntime::_new(THREAD, istate->method()->constants(), index),
+                handle_exception);
+        SET_STACK_OBJECT(THREAD->vm_result(), 0);
+        THREAD->set_vm_result(NULL);
+        UPDATE_PC_AND_TOS_AND_CONTINUE(3, 1);
+      }
+      CASE(_anewarray): {
+        u2 index = Bytes::get_Java_u2(pc+1);
+        jint size = STACK_INT(-1);
+        CALL_VM(InterpreterRuntime::anewarray(THREAD, istate->method()->constants(), index, size),
+                handle_exception);
+        SET_STACK_OBJECT(THREAD->vm_result(), -1);
+        THREAD->set_vm_result(NULL);
+        UPDATE_PC_AND_CONTINUE(3);
+      }
+      CASE(_multianewarray): {
+        jint dims = *(pc+3);
+        jint size = STACK_INT(-1);
+        // stack grows down, dimensions are up!
+        jint *dimarray =
+                   (jint*)&topOfStack[dims * Interpreter::stackElementWords()+
+                                      Interpreter::stackElementWords()-1];
+        //adjust pointer to start of stack element
+        CALL_VM(InterpreterRuntime::multianewarray(THREAD, dimarray),
+                handle_exception);
+        SET_STACK_OBJECT(THREAD->vm_result(), -dims);
+        THREAD->set_vm_result(NULL);
+        UPDATE_PC_AND_TOS_AND_CONTINUE(4, -(dims-1));
+      }
+      CASE(_checkcast):
+          if (STACK_OBJECT(-1) != NULL) {
+            u2 index = Bytes::get_Java_u2(pc+1);
+#ifndef CORE
+            if (ProfileInterpreter) {
+              // needs Profile_checkcast QQQ
+              ShouldNotReachHere();
+            }
+#endif
+            // Constant pool may have actual klass or unresolved klass. If it is
+            // unresolved we must resolve it
+            if (istate->method()->constants()->tag_at(index).is_unresolved_klass()) {
+              CALL_VM(InterpreterRuntime::quicken_io_cc(THREAD), handle_exception);
+            }
+            klassOop klassOf = (klassOop) *(istate->method()->constants()->obj_at_addr(index));
+            klassOop objKlassOop = STACK_OBJECT(-1)->klass(); //ebx
+            //
+            // Check for compatibilty. This check must not GC!!
+            // Seems way more expensive now that we must dispatch
+            //
+            if (objKlassOop != klassOf &&
+                !objKlassOop->klass_part()->is_subtype_of(klassOf)) {
+              ResourceMark rm(THREAD);
+              const char* objName = Klass::cast(objKlassOop)->external_name();
+              const char* klassName = Klass::cast(klassOf)->external_name();
+              char* message = SharedRuntime::generate_class_cast_message(
+                objName, klassName);
+              VM_JAVA_ERROR(vmSymbols::java_lang_ClassCastException(), message);
+            }
+          } else {
+            if (UncommonNullCast) {
+//              istate->method()->set_null_cast_seen();
+// [RGV] Not sure what to do here!
+              ShouldNotReachHere();
+
+            }
+          }
+          UPDATE_PC_AND_CONTINUE(3);
+
+      CASE(_instanceof):
+          if (STACK_OBJECT(-1) == NULL) {
+            SET_STACK_INT(0, -1);
+          } else {
+            u2 index = Bytes::get_Java_u2(pc+1);
+            // Constant pool may have actual klass or unresolved klass. If it is
+            // unresolved we must resolve it
+            if (istate->method()->constants()->tag_at(index).is_unresolved_klass()) {
+              CALL_VM(InterpreterRuntime::quicken_io_cc(THREAD), handle_exception);
+            }
+            klassOop klassOf = (klassOop) *(istate->method()->constants()->obj_at_addr(index));
+            klassOop objKlassOop = STACK_OBJECT(-1)->klass();
+            //
+            // Check for compatibilty. This check must not GC!!
+            // Seems way more expensive now that we must dispatch
+            //
+            if ( objKlassOop == klassOf || objKlassOop->klass_part()->is_subtype_of(klassOf)) {
+              SET_STACK_INT(1, -1);
+            } else {
+              SET_STACK_INT(0, -1);
+            }
+          }
+          UPDATE_PC_AND_CONTINUE(3);
+
+      CASE(_ldc_w):
+      CASE(_ldc):
+        {
+          u2 index;
+          bool wide = false;
+          int incr = 2; // frequent case
+          if (opcode == Bytecodes::_ldc) {
+            index = pc[1];
+          } else {
+            index = Bytes::get_Java_u2(pc+1);
+            incr = 3;
+            wide = true;
+          }
+
+          constantPoolOop constants = istate->method()->constants();
+          switch (constants->tag_at(index).value()) {
+          case JVM_CONSTANT_Integer:
+            SET_STACK_INT(constants->int_at(index), 0);
+            break;
+
+          case JVM_CONSTANT_Float:
+            SET_STACK_FLOAT(constants->float_at(index), 0);
+            break;
+
+          case JVM_CONSTANT_String:
+            SET_STACK_OBJECT(constants->resolved_string_at(index), 0);
+            break;
+
+          case JVM_CONSTANT_Class:
+            SET_STACK_OBJECT(constants->resolved_klass_at(index)->klass_part()->java_mirror(), 0);
+            break;
+
+          case JVM_CONSTANT_UnresolvedString:
+          case JVM_CONSTANT_UnresolvedClass:
+	  case JVM_CONSTANT_UnresolvedClassInError:
+            CALL_VM(InterpreterRuntime::ldc(THREAD, wide), handle_exception);
+            SET_STACK_OBJECT(THREAD->vm_result(), 0);
+            THREAD->set_vm_result(NULL);
+            break;
+
+#if 0
+          CASE(_fast_igetfield):
+          CASE(_fastagetfield):
+          CASE(_fast_aload_0):
+          CASE(_fast_iaccess_0):
+          CASE(__fast_aaccess_0):
+          CASE(_fast_linearswitch):
+          CASE(_fast_binaryswitch):
+            fatal("unsupported fast bytecode");
+#endif
+
+          default:  ShouldNotReachHere();
+          }
+          UPDATE_PC_AND_TOS_AND_CONTINUE(incr, 1);
+        }
+
+      CASE(_ldc2_w):
+        {
+          u2 index = Bytes::get_Java_u2(pc+1);
+
+          constantPoolOop constants = istate->method()->constants();
+          switch (constants->tag_at(index).value()) {
+
+          case JVM_CONSTANT_Long:
+             SET_STACK_LONG(constants->long_at(index), 1);
+            break;
+
+          case JVM_CONSTANT_Double:
+             SET_STACK_DOUBLE(constants->double_at(index), 1);
+            break;
+          default:  ShouldNotReachHere();
+          }
+          UPDATE_PC_AND_TOS_AND_CONTINUE(3, 2);
+        }
+
+      CASE(_invokeinterface): {
+        u2 index = Bytes::get_native_u2(pc+1);
+
+        // QQQ Need to make this as inlined as possible. Probably need to split all the bytecode cases
+        // out so c++ compiler has a chance for constant prop to fold everything possible away.
+
+        ConstantPoolCacheEntry* cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+          CALL_VM(InterpreterRuntime::resolve_invoke(THREAD, (Bytecodes::Code)opcode),
+                  handle_exception);
+          cache = cp->entry_at(index);
+        }
+
+        istate->set_msg(call_method);
+
+        // Special case of invokeinterface called for virtual method of
+        // java.lang.Object.  See cpCacheOop.cpp for details.
+        // This code isn't produced by javac, but could be produced by
+        // another compliant java compiler.
+        if (cache->is_methodInterface()) {
+          methodOop callee;
+          CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
+          if (cache->is_vfinal()) {
+            callee = (methodOop) cache->f2();
+          } else {
+            // get receiver
+            int parms = cache->parameter_size();
+            // Same comments as invokevirtual apply here
+            instanceKlass* rcvrKlass = (instanceKlass*)
+                                 STACK_OBJECT(-parms)->klass()->klass_part();
+            callee = (methodOop) rcvrKlass->start_of_vtable()[ cache->f2()];
+          }
+          istate->set_callee(callee);
+          istate->set_callee_entry_point(callee->from_interpreted_entry());
+#ifdef VM_JVMTI
+          if (JvmtiExport::can_post_interpreter_events() && THREAD->is_interp_only_mode()) {
+            istate->set_callee_entry_point(callee->interpreter_entry());
+          }
+#endif /* VM_JVMTI */
+          istate->set_bcp_advance(5);
+          UPDATE_PC_AND_RETURN(0); // I'll be back...
+        }
+
+        // this could definitely be cleaned up QQQ
+        methodOop callee;
+        klassOop iclass = (klassOop)cache->f1();
+        // instanceKlass* interface = (instanceKlass*) iclass->klass_part();
+        // get receiver
+        int parms = cache->parameter_size();
+        oop rcvr = STACK_OBJECT(-parms);
+        CHECK_NULL(rcvr);
+        instanceKlass* int2 = (instanceKlass*) rcvr->klass()->klass_part();
+        itableOffsetEntry* ki = (itableOffsetEntry*) int2->start_of_itable();
+        int i;
+        for ( i = 0 ; i < int2->itable_length() ; i++, ki++ ) {
+          if (ki->interface_klass() == iclass) break;
+        }
+        // If the interface isn't found, this class doesn't implement this
+        // interface.  The link resolver checks this but only for the first
+        // time this interface is called.
+        if (i == int2->itable_length()) {
+          VM_JAVA_ERROR(vmSymbols::java_lang_IncompatibleClassChangeError(), "");
+        }
+        int mindex = cache->f2();
+        itableMethodEntry* im = ki->first_method_entry(rcvr->klass());
+        callee = im[mindex].method();
+        if (callee == NULL) {
+          VM_JAVA_ERROR(vmSymbols::java_lang_AbstractMethodError(), "");
+        }
+
+        istate->set_callee(callee);
+        istate->set_callee_entry_point(callee->from_interpreted_entry());
+#ifdef VM_JVMTI
+        if (JvmtiExport::can_post_interpreter_events() && THREAD->is_interp_only_mode()) {
+          istate->set_callee_entry_point(callee->interpreter_entry());
+        }
+#endif /* VM_JVMTI */
+        istate->set_bcp_advance(5);
+        UPDATE_PC_AND_RETURN(0); // I'll be back...
+      }
+
+      CASE(_invokevirtual):
+      CASE(_invokespecial):
+      CASE(_invokestatic): {
+        u2 index = Bytes::get_native_u2(pc+1);
+
+        ConstantPoolCacheEntry* cache = cp->entry_at(index);
+        // QQQ Need to make this as inlined as possible. Probably need to split all the bytecode cases
+        // out so c++ compiler has a chance for constant prop to fold everything possible away.
+
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+          CALL_VM(InterpreterRuntime::resolve_invoke(THREAD, (Bytecodes::Code)opcode),
+                  handle_exception);
+          cache = cp->entry_at(index);
+        }
+
+        istate->set_msg(call_method);
+        {
+          methodOop callee;
+          if ((Bytecodes::Code)opcode == Bytecodes::_invokevirtual) {
+            CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
+            if (cache->is_vfinal()) callee = (methodOop) cache->f2();
+            else {
+              // get receiver
+              int parms = cache->parameter_size();
+              // this works but needs a resourcemark and seems to create a vtable on every call:
+              // methodOop callee = rcvr->klass()->klass_part()->vtable()->method_at(cache->f2());
+              //
+              // this fails with an assert
+              // instanceKlass* rcvrKlass = instanceKlass::cast(STACK_OBJECT(-parms)->klass());
+              // but this works
+              instanceKlass* rcvrKlass = (instanceKlass*) STACK_OBJECT(-parms)->klass()->klass_part();
+              /*
+                Executing this code in java.lang.String:
+                    public String(char value[]) {
+                          this.count = value.length;
+                          this.value = (char[])value.clone();
+                     }
+
+                 a find on rcvr->klass()->klass_part() reports:
+                 {type array char}{type array class}
+                  - klass: {other class}
+
+                  but using instanceKlass::cast(STACK_OBJECT(-parms)->klass()) causes in assertion failure
+                  because rcvr->klass()->klass_part()->oop_is_instance() == 0
+                  However it seems to have a vtable in the right location. Huh?
+
+              */
+              callee = (methodOop) rcvrKlass->start_of_vtable()[ cache->f2()];
+            }
+          } else {
+            if ((Bytecodes::Code)opcode == Bytecodes::_invokespecial) {
+              CHECK_NULL(STACK_OBJECT(-(cache->parameter_size())));
+            }
+            callee = (methodOop) cache->f1();
+          }
+
+          istate->set_callee(callee);
+          istate->set_callee_entry_point(callee->from_interpreted_entry());
+#ifdef VM_JVMTI
+          if (JvmtiExport::can_post_interpreter_events() && THREAD->is_interp_only_mode()) {
+            istate->set_callee_entry_point(callee->interpreter_entry());
+          }
+#endif /* VM_JVMTI */
+          istate->set_bcp_advance(3);
+          UPDATE_PC_AND_RETURN(0); // I'll be back...
+        }
+      }
+
+      /* Allocate memory for a new java object. */
+
+      CASE(_newarray): {
+        BasicType atype = (BasicType) *(pc+1);
+        jint size = STACK_INT(-1);
+        CALL_VM(InterpreterRuntime::newarray(THREAD, atype, size),
+                handle_exception);
+        SET_STACK_OBJECT(THREAD->vm_result(), -1);
+        THREAD->set_vm_result(NULL);
+
+        UPDATE_PC_AND_CONTINUE(2);
+      }
+
+      /* Throw an exception. */
+
+      CASE(_athrow): {
+          oop except_oop = STACK_OBJECT(-1);
+          CHECK_NULL(except_oop);
+          // set pending_exception so we use common code
+          THREAD->set_pending_exception(except_oop, NULL, 0);
+          goto handle_exception;
+      }
+
+      /* goto and jsr. They are exactly the same except jsr pushes
+       * the address of the next instruction first.
+       */
+
+      CASE(_jsr): {
+          /* push bytecode index on stack */
+          SET_STACK_SLOT(((address)pc - (intptr_t)(istate->method()->code_base()) + 3), 0);
+          MORE_STACK(1);
+          /* FALL THROUGH */
+      }
+
+      CASE(_goto):
+      {
+          int16_t offset = (int16_t)Bytes::get_Java_u2(pc + 1);
+          address branch_pc = pc;
+          UPDATE_PC(offset);
+          DO_BACKEDGE_CHECKS(offset, branch_pc);
+          CONTINUE;
+      }
+
+      CASE(_jsr_w): {
+          /* push return address on the stack */
+          SET_STACK_SLOT(((address)pc - (intptr_t)(istate->method()->code_base()) + 5), 0);
+          MORE_STACK(1);
+          /* FALL THROUGH */
+      }
+
+      CASE(_goto_w):
+      {
+          int32_t offset = Bytes::get_Java_u4(pc + 1);
+          address branch_pc = pc;
+          UPDATE_PC(offset);
+          DO_BACKEDGE_CHECKS(offset, branch_pc);
+          CONTINUE;
+      }
+
+      /* return from a jsr or jsr_w */
+
+      CASE(_ret): {
+          pc = istate->method()->code_base() + (intptr_t)(LOCALS_SLOT(pc[1]));
+          UPDATE_PC_AND_CONTINUE(0);
+      }
+
+      /* debugger breakpoint */
+
+      CASE(_breakpoint): {
+          Bytecodes::Code original_bytecode;
+          DECACHE_STATE();
+          SET_LAST_JAVA_FRAME();
+          original_bytecode = InterpreterRuntime::get_original_bytecode_at(THREAD,
+                              istate->method(), pc);
+          RESET_LAST_JAVA_FRAME();
+          CACHE_STATE();
+          if (THREAD->pending_exception()) goto handle_exception;
+            CALL_VM(InterpreterRuntime::_breakpoint(THREAD, istate->method(), pc),
+                                                    handle_exception);
+
+          opcode = (jubyte)original_bytecode;
+          goto opcode_switch;
+      }
+
+      DEFAULT:
+          fatal2("\t*** Unimplemented opcode: %d = %s\n",
+                 opcode, Bytecodes::name((Bytecodes::Code)opcode));
+          goto finish;
+
+      } /* switch(opc) */
+
+
+#ifdef USELABELS
+    check_for_exception:
+#endif
+    {
+      if (!THREAD->has_pending_exception()) {
+        CONTINUE;
+      }
+      /* We will be gcsafe soon, so flush our state. */
+      DECACHE_PC();
+      goto handle_exception;
+    }
+  do_continue: ;
+
+  } /* while (1) interpreter loop */
+
+
+  // An exception exists in the thread state see whether this activation can handle it
+  handle_exception: {
+
+    HandleMarkCleaner __hmc(THREAD);
+    Handle except_oop(THREAD, THREAD->pending_exception());
+    // Prevent any subsequent HandleMarkCleaner in the VM
+    // from freeing the except_oop handle.
+    HandleMark __hm(THREAD);
+
+    THREAD->clear_pending_exception();
+    assert(except_oop(), "No exception to process");
+    intptr_t continuation_bci;
+    // expression stack is emptied
+    topOfStack = istate->stack_base() - Interpreter::stackElementWords();
+    CALL_VM(continuation_bci = (intptr_t)InterpreterRuntime::exception_handler_for_exception(THREAD, except_oop()),
+            handle_exception);
+
+    except_oop = (oop) THREAD->vm_result();
+    THREAD->set_vm_result(NULL);
+    if (continuation_bci >= 0) {
+      // Place exception on top of stack
+      SET_STACK_OBJECT(except_oop(), 0);
+      MORE_STACK(1);
+      pc = istate->method()->code_base() + continuation_bci;
+      if (TraceExceptions) {
+        ttyLocker ttyl;
+        ResourceMark rm;
+        tty->print_cr("Exception <%s> (" INTPTR_FORMAT ")", except_oop->print_value_string(), except_oop());
+        tty->print_cr(" thrown in interpreter method <%s>", istate->method()->print_value_string());
+        tty->print_cr(" at bci %d, continuing at %d for thread " INTPTR_FORMAT,
+                      pc - (intptr_t)istate->method()->code_base(),
+                      continuation_bci, THREAD);
+      }
+      // for AbortVMOnException flag
+      NOT_PRODUCT(Exceptions::debug_check_abort(except_oop));
+      goto run;
+    }
+    if (TraceExceptions) {
+      ttyLocker ttyl;
+      ResourceMark rm;
+      tty->print_cr("Exception <%s> (" INTPTR_FORMAT ")", except_oop->print_value_string(), except_oop());
+      tty->print_cr(" thrown in interpreter method <%s>", istate->method()->print_value_string());
+      tty->print_cr(" at bci %d, unwinding for thread " INTPTR_FORMAT,
+                    pc  - (intptr_t) istate->method()->code_base(),
+                    THREAD);
+    }
+    // for AbortVMOnException flag
+    NOT_PRODUCT(Exceptions::debug_check_abort(except_oop));
+    // No handler in this activation, unwind and try again
+    THREAD->set_pending_exception(except_oop(), NULL, 0);
+    goto handle_return;
+  }  /* handle_exception: */
+
+
+
+  // Return from an interpreter invocation with the result of the interpretation
+  // on the top of the Java Stack (or a pending exception)
+
+handle_Pop_Frame:
+
+  // We don't really do anything special here except we must be aware
+  // that we can get here without ever locking the method (if sync).
+  // Also we skip the notification of the exit.
+
+  istate->set_msg(popping_frame);
+  // Clear pending so while the pop is in process
+  // we don't start another one if a call_vm is done.
+  THREAD->clr_pop_frame_pending();
+  // Let interpreter (only) see the we're in the process of popping a frame
+  THREAD->set_pop_frame_in_process();
+
+handle_return:
+  {
+    DECACHE_STATE();
+
+    bool suppress_error = istate->msg() == popping_frame;
+    bool suppress_exit_event = THREAD->has_pending_exception() || suppress_error;
+    Handle original_exception(THREAD, THREAD->pending_exception());
+    Handle illegal_state_oop(THREAD, NULL);
+
+    // We'd like a HandleMark here to prevent any subsequent HandleMarkCleaner
+    // in any following VM entries from freeing our live handles, but illegal_state_oop
+    // isn't really allocated yet and so doesn't become live until later and
+    // in unpredicatable places. Instead we must protect the places where we enter the
+    // VM. It would be much simpler (and safer) if we could allocate a real handle with
+    // a NULL oop in it and then overwrite the oop later as needed. This isn't
+    // unfortunately isn't possible.
+
+    THREAD->clear_pending_exception();
+
+    //
+    // As far as we are concerned we have returned. If we have a pending exception
+    // that will be returned as this invocation's result. However if we get any
+    // exception(s) while checking monitor state one of those IllegalMonitorStateExceptions
+    // will be our final result (i.e. monitor exception trumps a pending exception).
+    //
+
+    // If we never locked the method (or really passed the point where we would have),
+    // there is no need to unlock it (or look for other monitors), since that
+    // could not have happened.
+
+    if (!THREAD->do_not_unlock()) {
+      // At this point we consider that we have returned. We now check that the
+      // locks were properly block structured. If we find that they were not
+      // used properly we will return with an illegal monitor exception.
+      // The exception is checked by the caller not the callee since this
+      // checking is considered to be part of the invocation and therefore
+      // in the callers scope (JVM spec 8.13).
+      //
+      // Another weird thing to watch for is if the method was locked
+      // recursively and then not exited properly. This means we must
+      // examine all the entries in reverse time(and stack) order and
+      // unlock as we find them. If we find the method monitor before
+      // we are at the initial entry then we should throw an exception.
+      // It is not clear the template based interpreter does this
+      // correctly
+
+      BasicObjectLock* base = istate->monitor_base();
+      BasicObjectLock* end = (BasicObjectLock*) istate->stack_base();
+      bool method_unlock_needed = istate->method()->is_synchronized();
+      // We know the initial monitor was used for the method don't check that
+      // slot in the loop
+      if (method_unlock_needed) base--;
+
+      // Check all the monitors to see they are unlocked. Install exception if found to be locked.
+      while (end < base) {
+        oop lockee = end->obj();
+        if (lockee != NULL) {
+          BasicLock* lock = end->lock();
+          markOop header = lock->displaced_header();
+          end->set_obj(NULL);
+          // If it isn't recursive we either must swap old header or call the runtime
+          if (header != NULL) {
+            if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
+              // restore object for the slow case
+              end->set_obj(lockee);
+              {
+                // Prevent any HandleMarkCleaner from freeing our live handles
+                HandleMark __hm(THREAD);
+                CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, end));
+              }
+            }
+          }
+          // One error is plenty
+          if (illegal_state_oop() == NULL && !suppress_error) {
+            {
+              // Prevent any HandleMarkCleaner from freeing our live handles
+              HandleMark __hm(THREAD);
+              CALL_VM_NOCHECK(InterpreterRuntime::throw_illegal_monitor_state_exception(THREAD));
+            }
+            assert(THREAD->has_pending_exception(), "Lost our exception!");
+            illegal_state_oop = THREAD->pending_exception();
+            THREAD->clear_pending_exception();
+          }
+        }
+        end++;
+      }
+      // Unlock the method if needed
+      if (method_unlock_needed) {
+        if (base->obj() == NULL) {
+          // The method is already unlocked this is not good.
+          if (illegal_state_oop() == NULL && !suppress_error) {
+            {
+              // Prevent any HandleMarkCleaner from freeing our live handles
+              HandleMark __hm(THREAD);
+              CALL_VM_NOCHECK(InterpreterRuntime::throw_illegal_monitor_state_exception(THREAD));
+            }
+            assert(THREAD->has_pending_exception(), "Lost our exception!");
+            illegal_state_oop = THREAD->pending_exception();
+            THREAD->clear_pending_exception();
+          }
+        } else {
+          //
+          // The initial monitor is always used for the method
+          // However if that slot is no longer the oop for the method it was unlocked
+          // and reused by something that wasn't unlocked!
+          //
+          // deopt can come in with rcvr dead because c2 knows
+          // its value is preserved in the monitor. So we can't use locals[0] at all
+          // and must use first monitor slot.
+          //
+          oop rcvr = base->obj();
+          if (rcvr == NULL) {
+            if (!suppress_error) {
+              VM_JAVA_ERROR_NO_JUMP(vmSymbols::java_lang_NullPointerException(), "");
+              illegal_state_oop = THREAD->pending_exception();
+              THREAD->clear_pending_exception();
+            }
+          } else {
+            BasicLock* lock = base->lock();
+            markOop header = lock->displaced_header();
+            base->set_obj(NULL);
+            // If it isn't recursive we either must swap old header or call the runtime
+            if (header != NULL) {
+              if (Atomic::cmpxchg_ptr(header, rcvr->mark_addr(), lock) != lock) {
+                // restore object for the slow case
+                base->set_obj(rcvr);
+                {
+                  // Prevent any HandleMarkCleaner from freeing our live handles
+                  HandleMark __hm(THREAD);
+                  CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, base));
+                }
+                if (THREAD->has_pending_exception()) {
+                  if (!suppress_error) illegal_state_oop = THREAD->pending_exception();
+                  THREAD->clear_pending_exception();
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+    //
+    // Notify jvmti/jvmdi/jvmpi
+    //
+    // NOTE: we do not notify a method_exit if we have a pending exception,
+    // including an exception we generate for unlocking checks.  In the former
+    // case, JVMDI has already been notified by our call for the exception handler
+    // and in both cases as far as JVMDI is concerned we have already returned.
+    // If we notify it again JVMDI will be all confused about how many frames
+    // are still on the stack (4340444).
+    //
+    // Further note that jvmpi does not suppress method_exit notifications
+    // in the case of exceptions (which makes more sense to me). See bug
+    // 4933156
+    //
+    // NOTE Further! It turns out the the JVMTI spec in fact expects to see
+    // method_exit events whenever we leave an activation unless it was done
+    // for popframe. This is just like jvmpi and nothing like jvmdi. However
+    // we are passing the tests at the moment (apparently becuase they are
+    // jvmdi based) so rather than change this code and possibly fail tests
+    // we will leave it alone (with this note) in anticipation of changing
+    // the vm and the tests simultaneously.
+
+
+    //
+    suppress_exit_event = suppress_exit_event || illegal_state_oop() != NULL;
+
+
+
+#ifdef VM_JVMTI
+      if (_jvmti_interp_events) {
+        // Whenever JVMTI puts a thread in interp_only_mode, method
+        // entry/exit events are sent for that thread to track stack depth.
+        if ( !suppress_exit_event && THREAD->is_interp_only_mode() ) {
+          {
+            // Prevent any HandleMarkCleaner from freeing our live handles
+            HandleMark __hm(THREAD);
+            CALL_VM_NOCHECK(InterpreterRuntime::post_method_exit(THREAD));
+          }
+        }
+      }
+#endif /* VM_JVMTI */
+
+    /* Only suppress method_exit events for jvmpi if we are doing a popFrame */
+    if ( istate->msg() != popping_frame && *jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_EXIT) == JVMPI_EVENT_ENABLED) {
+      {
+        // Prevent any HandleMarkCleaner from freeing our live handles
+        HandleMark __hm(THREAD);
+        CALL_VM_NOCHECK(SharedRuntime::jvmpi_method_exit(THREAD, istate->method()))
+      }
+    }
+
+    //
+    // See if we are returning any exception
+    // A pending exception that was pending prior to a possible popping frame
+    // overrides the popping frame.
+    //
+    assert(!suppress_error || suppress_error && illegal_state_oop() == NULL, "Error was not suppressed");
+    if (illegal_state_oop() != NULL || original_exception() != NULL) {
+      // inform the frame manager we have no result
+      istate->set_msg(throwing_exception);
+      if (illegal_state_oop() != NULL)
+        THREAD->set_pending_exception(illegal_state_oop(), NULL, 0);
+      else
+        THREAD->set_pending_exception(original_exception(), NULL, 0);
+      istate->set_return_kind((Bytecodes::Code)opcode);
+      UPDATE_PC_AND_RETURN(0);
+    }
+
+    if (istate->msg() == popping_frame) {
+      // Make it simpler on the assembly code and set the message for the frame pop.
+      // returns
+      if (istate->prev() == NULL) {
+        // We must be returning to a deoptimized frame (because popframe only happens between
+        // two interpreted frames). We need to save the current arguments in C heap so that
+        // the deoptimized frame when it restarts can copy the arguments to its expression
+        // stack and re-execute the call. We also have to notify deoptimization that this
+        // has occured and to pick the preerved args copy them to the deoptimized frame's
+        // java expression stack. Yuck.
+        //
+#ifndef CORE
+        THREAD->popframe_preserve_args(in_ByteSize(istate->method()->size_of_parameters() * wordSize),
+                                LOCALS_SLOT(istate->method()->size_of_parameters() - 1));
+        THREAD->set_popframe_condition_bit(JavaThread::popframe_force_deopt_reexecution_bit);
+#else
+         assert(false, "must return to interpreted frame");
+#endif
+      }
+      UPDATE_PC_AND_RETURN(1);
+    } else {
+      // Normal return
+      // Advance the pc and return to frame manager
+      istate->set_msg(return_from_method);
+      istate->set_return_kind((Bytecodes::Code)opcode);
+      UPDATE_PC_AND_RETURN(1);
+    }
+  } /* handle_return: */
+
+// This is really a fatal error return
+
+finish:
+  DECACHE_TOS();
+  DECACHE_PC();
+
+  return;
+}
+
+#endif // CC_INTERP
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/interpreter/cInterpreter.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,552 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)cInterpreter.hpp	1.17 05/11/18 15:21:56 JVM"
+#endif
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+#ifdef CC_INTERP
+
+// CVM definitions find hotspot equivalents...
+
+union VMJavaVal64 {
+    jlong   l;
+    jdouble d;
+    uint32_t      v[2];
+};
+
+
+typedef class cInterpreter* interpreterState;
+
+struct call_message {
+    methodOop _callee;               /* method to call during call_method request */
+    address   _callee_entry_point;   /* address to jump to for call_method request */
+    int       _bcp_advance;          /* size of the invoke bytecode operation */
+};
+
+struct osr_message {
+    address _osr_buf;                 /* the osr buffer */
+    address _osr_entry;               /* the entry to the osr method */
+};
+
+// Result returned to frame manager
+union frame_manager_message {
+    call_message _to_call;            /* describes callee */
+    Bytecodes::Code _return_kind;     /* i_return, a_return, ... */
+    osr_message _osr;                 /* describes the osr */
+};
+
+class cInterpreter : StackObj {
+friend class AbstractInterpreterGenerator;
+friend class InterpreterGenerator;
+friend class InterpreterMacroAssembler;
+friend class frame;
+friend class VMStructs;
+
+public:
+    enum messages {
+         no_request = 0,            // unused
+         initialize,                // Perform one time interpreter initializations (assumes all switches set)
+         // status message to C++ interpreter
+         method_entry,              // initial method entry to interpreter
+         method_resume,             // frame manager response to return_from_method request (assuming a frame to resume)
+         deopt_resume,              // returning from a native call into a deopted frame
+         deopt_resume2,             // deopt resume as a result of a PopFrame
+         got_monitors,              // frame manager response to more_monitors request
+         rethrow_exception,         // unwinding and throwing exception
+         // requests to frame manager from C++ interpreter
+         call_method,               // request for new frame from interpreter, manager responds with method_entry
+         do_osr,                    // osr the current method
+         return_from_method,        // request from interpreter to unwind, manager responds with method_continue
+         more_monitors,             // need a new monitor
+	 throwing_exception,        // unwind stack and rethrow
+	 popping_frame              // unwind call and retry call
+    };
+
+private:
+    JavaThread*           _thread;        // the vm's java thread pointer
+    address               _bcp;           // instruction pointer
+    intptr_t*             _locals;        // local variable pointer
+    constantPoolCacheOop  _constants;     // constant pool cache
+    methodOop             _method;        // method being executed
+#ifndef CORE
+    DataLayout*           _mdx;           // compiler profiling data for current bytecode
+#endif
+    intptr_t*             _stack;         // expression stack
+    messages              _msg;           // frame manager <-> interpreter message
+    frame_manager_message _result;        // result to frame manager
+    interpreterState      _prev_link;     // previous interpreter state
+    oop                   _oop_temp;      // mirror for interpreted native, null otherwise
+    // These are likely platform dependent fields
+    // jint*  sender_sp;                  // previous stack pointer
+    intptr_t*             _stack_base;    // base of expression stack
+    intptr_t*             _stack_limit;   // limit of expression stack
+    BasicObjectLock*      _monitor_base;  // base of monitors on the native stack
+
+
+public:
+  // Constructor is only used by the initialization step. All other instances are created
+  // by the frame manager.
+  cInterpreter(messages msg);
+
+#ifndef CORE
+//
+// Deoptimization support
+//
+static void layout_interpreterState(interpreterState to_fill,
+				    frame* caller,
+				    frame* interpreter_frame,
+				    methodOop method,
+				    intptr_t* locals,
+				    intptr_t* stack,
+				    intptr_t* stack_base,
+				    intptr_t* monitor_base,
+				    intptr_t* frame_bottom,
+				    bool top_frame);
+#endif
+
+/*
+ * Generic 32-bit wide "Java slot" definition. This type occurs
+ * in operand stacks, Java locals, object fields, constant pools.
+ */
+union VMJavaVal32 {
+    jint     i;
+    jfloat   f;
+    oop      r;
+    uint32_t raw;
+};
+
+/*
+ * Generic 64-bit Java value definition
+ */
+union VMJavaVal64 {
+    jlong   l;
+    jdouble d;
+    uint32_t      v[2];
+};
+
+/*
+ * Generic 32-bit wide "Java slot" definition. This type occurs
+ * in Java locals, object fields, constant pools, and
+ * operand stacks (as a CVMStackVal32).
+ */
+typedef union VMSlotVal32 {
+    VMJavaVal32    j;     /* For "Java" values */
+    address        a;     /* a return created by jsr or jsr_w */
+} VMSlotVal32;
+
+
+/*
+ * Generic 32-bit wide stack slot definition.
+ */
+union VMStackVal32 {
+    VMJavaVal32    j;     /* For "Java" values */
+    VMSlotVal32    s;     /* any value from a "slot" or locals[] */
+};
+
+inline JavaThread* thread() { return _thread; }
+
+inline address bcp() { return _bcp; }
+inline void set_bcp(address new_bcp) { _bcp = new_bcp; }
+
+inline intptr_t* locals() { return _locals; }
+
+inline constantPoolCacheOop constants() { return _constants; }
+inline methodOop method() { return _method; }
+#ifndef CORE
+inline DataLayout* mdx() { return _mdx; }
+#endif
+
+inline messages msg() { return _msg; }
+inline void set_msg(messages new_msg) { _msg = new_msg; }
+
+inline methodOop callee() { return _result._to_call._callee; }
+inline void set_callee(methodOop new_callee) { _result._to_call._callee = new_callee; }
+inline void set_callee_entry_point(address entry) { _result._to_call._callee_entry_point = entry; }
+inline void set_osr_buf(address buf) { _result._osr._osr_buf = buf; }
+inline void set_osr_entry(address entry) { _result._osr._osr_entry = entry; }
+inline int bcp_advance() { return _result._to_call._bcp_advance; }
+inline void set_bcp_advance(int count) { _result._to_call._bcp_advance = count; }
+
+inline void set_return_kind(Bytecodes::Code kind) { _result._return_kind = kind; }
+
+inline interpreterState prev() { return _prev_link; }
+
+inline intptr_t* stack() { return _stack; }
+inline void set_stack(intptr_t* new_stack) { _stack = new_stack; }
+
+
+inline intptr_t* stack_base() { return _stack_base; }
+inline intptr_t* stack_limit() { return _stack_limit; }
+
+inline BasicObjectLock* monitor_base() { return _monitor_base; }
+
+/*
+ * 64-bit Arithmetic:
+ *
+ * The functions below follow the semantics of the
+ * ladd, land, ldiv, lmul, lor, lxor, and lrem bytecodes,
+ * respectively.
+ */
+
+static jlong VMlongAdd(jlong op1, jlong op2);
+static jlong VMlongAnd(jlong op1, jlong op2);
+static jlong VMlongDiv(jlong op1, jlong op2);
+static jlong VMlongMul(jlong op1, jlong op2);
+static jlong VMlongOr (jlong op1, jlong op2);
+static jlong VMlongSub(jlong op1, jlong op2);
+static jlong VMlongXor(jlong op1, jlong op2);
+static jlong VMlongRem(jlong op1, jlong op2);
+
+/*
+ * Shift:
+ *
+ * The functions below follow the semantics of the
+ * lushr, lshl, and lshr bytecodes, respectively.
+ */
+
+static jlong VMlongUshr(jlong op1, jint op2);
+static jlong VMlongShl (jlong op1, jint op2);
+static jlong VMlongShr (jlong op1, jint op2);
+
+/*
+ * Unary:
+ *
+ * Return the negation of "op" (-op), according to
+ * the semantics of the lneg bytecode.
+ */
+
+static jlong VMlongNeg(jlong op);
+
+/*
+ * Return the complement of "op" (~op)
+ */
+
+static jlong VMlongNot(jlong op);
+
+
+/*
+ * Comparisons to 0:
+ */
+
+static int32_t VMlongLtz(jlong op);     /* op <= 0 */
+static int32_t VMlongGez(jlong op);     /* op >= 0 */
+static int32_t VMlongEqz(jlong op);     /* op == 0 */
+
+/*
+ * Between operands:
+ */
+
+static int32_t VMlongEq(jlong op1, jlong op2);    /* op1 == op2 */
+static int32_t VMlongNe(jlong op1, jlong op2);    /* op1 != op2 */
+static int32_t VMlongGe(jlong op1, jlong op2);    /* op1 >= op2 */
+static int32_t VMlongLe(jlong op1, jlong op2);    /* op1 <= op2 */
+static int32_t VMlongLt(jlong op1, jlong op2);    /* op1 <  op2 */
+static int32_t VMlongGt(jlong op1, jlong op2);    /* op1 >  op2 */
+
+/*
+ * Comparisons (returning an jint value: 0, 1, or -1)
+ *
+ * Between operands:
+ *
+ * Compare "op1" and "op2" according to the semantics of the
+ * "lcmp" bytecode.
+ */
+
+static int32_t VMlongCompare(jlong op1, jlong op2);
+
+/*
+ * Convert int to long, according to "i2l" bytecode semantics
+ */
+static jlong VMint2Long(jint val);
+
+/*
+ * Convert long to int, according to "l2i" bytecode semantics
+ */
+static jint VMlong2Int(jlong val);
+
+/*
+ * Convert long to float, according to "l2f" bytecode semantics
+ */
+static jfloat VMlong2Float(jlong val);
+
+/*
+ * Convert long to double, according to "l2d" bytecode semantics
+ */
+static jdouble VMlong2Double(jlong val);
+
+/*
+ * Java floating-point float value manipulation.
+ *
+ * The result argument is, once again, an lvalue.
+ *
+ * Arithmetic:
+ *
+ * The functions below follow the semantics of the
+ * fadd, fsub, fmul, fdiv, and frem bytecodes,
+ * respectively.
+ */
+
+static jfloat VMfloatAdd(jfloat op1, jfloat op2);
+static jfloat VMfloatSub(jfloat op1, jfloat op2);
+static jfloat VMfloatMul(jfloat op1, jfloat op2);
+static jfloat VMfloatDiv(jfloat op1, jfloat op2);
+static jfloat VMfloatRem(jfloat op1, jfloat op2);
+
+/*
+ * Unary:
+ *
+ * Return the negation of "op" (-op), according to
+ * the semantics of the fneg bytecode.
+ */
+
+static jfloat VMfloatNeg(jfloat op);
+
+/*
+ * Comparisons (returning an int value: 0, 1, or -1)
+ *
+ * Between operands:
+ *
+ * Compare "op1" and "op2" according to the semantics of the
+ * "fcmpl" (direction is -1) or "fcmpg" (direction is 1) bytecodes.
+ */
+
+static int32_t VMfloatCompare(jfloat op1, jfloat op2,
+                              int32_t direction);
+/*
+ * Conversion:
+ */
+
+/*
+ * Convert float to double, according to "f2d" bytecode semantics
+ */
+
+static jdouble VMfloat2Double(jfloat op);
+
+/*
+ ******************************************
+ * Java double floating-point manipulation.
+ ******************************************
+ *
+ * The result argument is, once again, an lvalue.
+ *
+ * Conversions:
+ */
+
+/*
+ * Convert double to int, according to "d2i" bytecode semantics
+ */
+
+static jint VMdouble2Int(jdouble val);
+
+/*
+ * Convert double to float, according to "d2f" bytecode semantics
+ */
+
+static jfloat VMdouble2Float(jdouble val);
+
+/*
+ * Convert int to double, according to "i2d" bytecode semantics
+ */
+
+static jdouble VMint2Double(jint val);
+
+/*
+ * Arithmetic:
+ *
+ * The functions below follow the semantics of the
+ * dadd, dsub, ddiv, dmul, and drem bytecodes, respectively.
+ */
+
+static jdouble VMdoubleAdd(jdouble op1, jdouble op2);
+static jdouble VMdoubleSub(jdouble op1, jdouble op2);
+static jdouble VMdoubleDiv(jdouble op1, jdouble op2);
+static jdouble VMdoubleMul(jdouble op1, jdouble op2);
+static jdouble VMdoubleRem(jdouble op1, jdouble op2);
+
+/*
+ * Unary:
+ *
+ * Return the negation of "op" (-op), according to
+ * the semantics of the dneg bytecode.
+ */
+
+static jdouble VMdoubleNeg(jdouble op);
+
+/*
+ * Comparisons (returning an int32_t value: 0, 1, or -1)
+ *
+ * Between operands:
+ *
+ * Compare "op1" and "op2" according to the semantics of the
+ * "dcmpl" (direction is -1) or "dcmpg" (direction is 1) bytecodes.
+ */
+
+static int32_t VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction);
+
+/*
+ * Copy two typeless 32-bit words from one location to another.
+ * This is semantically equivalent to:
+ *
+ * to[0] = from[0];
+ * to[1] = from[1];
+ *
+ * but this interface is provided for those platforms that could
+ * optimize this into a single 64-bit transfer.
+ */
+
+static void VMmemCopy64(uint32_t to[2], const uint32_t from[2]);
+
+
+// Arithmetic operations
+
+/*
+ * Java arithmetic methods.
+ * The functions below follow the semantics of the
+ * iadd, isub, imul, idiv, irem, iand, ior, ixor,
+ * and ineg bytecodes, respectively.
+ */
+
+static jint VMintAdd(jint op1, jint op2);
+static jint VMintSub(jint op1, jint op2);
+static jint VMintMul(jint op1, jint op2);
+static jint VMintDiv(jint op1, jint op2);
+static jint VMintRem(jint op1, jint op2);
+static jint VMintAnd(jint op1, jint op2);
+static jint VMintOr (jint op1, jint op2);
+static jint VMintXor(jint op1, jint op2);
+
+/*
+ * Shift Operation:
+ * The functions below follow the semantics of the
+ * iushr, ishl, and ishr bytecodes, respectively.
+ */
+
+static jint VMintUshr(jint op, jint num);
+static jint VMintShl (jint op, jint num);
+static jint VMintShr (jint op, jint num);
+
+/*
+ * Unary Operation:
+ *
+ * Return the negation of "op" (-op), according to
+ * the semantics of the ineg bytecode.
+ */
+
+static jint VMintNeg(jint op);
+
+/*
+ * Int Conversions:
+ */
+
+/*
+ * Convert int to float, according to "i2f" bytecode semantics
+ */
+
+static jfloat VMint2Float(jint val);
+
+/*
+ * Convert int to byte, according to "i2b" bytecode semantics
+ */
+
+static jbyte VMint2Byte(jint val);
+
+/*
+ * Convert int to char, according to "i2c" bytecode semantics
+ */
+
+static jchar VMint2Char(jint val);
+
+/*
+ * Convert int to short, according to "i2s" bytecode semantics
+ */
+
+static jshort VMint2Short(jint val);
+
+/*=========================================================================
+ * Bytecode interpreter operations
+ *=======================================================================*/
+
+static void dup(intptr_t *tos);
+static void dup2(intptr_t *tos);
+static void dup_x1(intptr_t *tos);    /* insert top word two down */
+static void dup_x2(intptr_t *tos);    /* insert top word three down  */
+static void dup2_x1(intptr_t *tos);   /* insert top 2 slots three down */
+static void dup2_x2(intptr_t *tos);   /* insert top 2 slots four down */
+static void swap(intptr_t *tos);      /* swap top two elements */
+
+// umm don't like this method modifies its object
+
+// The Interpreter used when
+static void InterpretMethod(interpreterState istate);
+// The interpreter used if JVMPI is enabled or JVMTI need interpreter events
+static void InterpretMethodWithChecks(interpreterState istate);
+static void End_Of_Interpreter(void);
+
+// Inline static functions for Java Stack and Local manipulation
+
+static address stack_slot(intptr_t *tos, int offset);
+static jint stack_int(intptr_t *tos, int offset);
+static jfloat stack_float(intptr_t *tos, int offset);
+static oop stack_object(intptr_t *tos, int offset);
+static jdouble stack_double(intptr_t *tos, int offset);
+static jlong stack_long(intptr_t *tos, int offset);
+
+static void tag_stack(intptr_t *tos, frame::Tag tag, int offset);
+
+// only used for value types
+static void set_stack_slot(intptr_t *tos, address value, int offset);
+static void set_stack_int(intptr_t *tos, int value, int offset);
+static void set_stack_float(intptr_t *tos, jfloat value, int offset);
+static void set_stack_object(intptr_t *tos, oop value, int offset);
+
+// needs to be platform dep for the 32 bit platforms.
+static void set_stack_double(intptr_t *tos, jdouble value, int offset);
+static void set_stack_long(intptr_t *tos, jlong value, int offset);
+
+static void set_stack_double_from_addr(intptr_t *tos, address addr, int offset);
+static void set_stack_long_from_addr(intptr_t *tos, address addr, int offset);
+
+// Locals
+
+static address locals_slot(intptr_t* locals, int offset);
+static jint locals_int(intptr_t* locals, int offset);
+static jfloat locals_float(intptr_t* locals, int offset);
+static oop locals_object(intptr_t* locals, int offset);
+static jdouble locals_double(intptr_t* locals, int offset);
+static jlong locals_long(intptr_t* locals, int offset);
+
+static address locals_long_at(intptr_t* locals, int offset);
+static address locals_double_at(intptr_t* locals, int offset);
+
+static void tag_locals(intptr_t *locals, frame::Tag tag, int offset);
+
+static void set_locals_slot(intptr_t *locals, address value, int offset);
+static void set_locals_int(intptr_t *locals, jint value, int offset);
+static void set_locals_float(intptr_t *locals, jfloat value, int offset);
+static void set_locals_object(intptr_t *locals, oop value, int offset);
+static void set_locals_double(intptr_t *locals, jdouble value, int offset);
+static void set_locals_long(intptr_t *locals, jlong value, int offset);
+static void set_locals_double_from_addr(intptr_t *locals,
+                                   address addr, int offset);
+static void set_locals_long_from_addr(intptr_t *locals,
+                                   address addr, int offset);
+
+static void astore(intptr_t* topOfStack, int stack_offset,
+                   intptr_t* locals,     int locals_offset);
+
+// Support for dup and swap
+static void copy_stack_slot(intptr_t *tos, int from_offset, int to_offset);
+
+#ifndef PRODUCT
+static void verify_locals_tag(intptr_t *locals, frame::Tag tag, int offset);
+static void verify_stack_tag(intptr_t *tos, frame::Tag tag, int offset);
+#endif // PRODUCT
+
+    // Platform fields/methods
+# include "incls/_cInterpreter_pd.hpp.incl"
+
+}; // cInterpreter
+
+#endif // CC_INTERP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/interpreter/cInterpreter.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,28 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)cInterpreter.inline.hpp	1.5 05/11/18 15:21:57 JVM"
+#endif
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+// This file holds platform-independant bodies of inline functions for the C++ based interpreter
+
+#ifdef CC_INTERP
+
+#ifdef ASSERT
+extern "C" { typedef void (*verify_oop_fn_t)(oop, const char *);};
+#define VERIFY_OOP(o) \
+	/*{ verify_oop_fn_t verify_oop_entry = \
+            *StubRoutines::verify_oop_subroutine_entry_address(); \
+          if (verify_oop_entry) { \
+	     (*verify_oop_entry)((o), "Not an oop!"); \
+	  } \
+	}*/
+#else
+#define VERIFY_OOP(o)
+#endif
+
+// Platform dependent data manipulation
+# include "incls/_cInterpreter_pd.inline.hpp.incl"
+#endif // CC_INTERP
--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -238,8 +239,19 @@

 IRT_ENTRY(void, InterpreterRuntime::create_exception(JavaThread* thread, char* name, char* message))
   // lookup exception klass
+#if 0
+#ifdef LOONGSONDEBUG
+	printf("create exeption %s :%s\n", name , message);
+#endif
+#endif
   symbolHandle s = oopFactory::new_symbol_handle(name, CHECK);
   if (ProfileTraps) {
+#if 0
+#ifdef LOONGSONDEBUG
+	printf("profile traps\n");
+#endif
+#endif
+
     if (s == vmSymbols::java_lang_ArithmeticException()) {
       note_trap(thread, Deoptimization::Reason_div0_check, CHECK);
     } else if (s == vmSymbols::java_lang_NullPointerException()) {
@@ -316,6 +328,9 @@
   // is set, we don't want to trigger any classloading which may make calls
   // into java, or surprisingly find a matching exception handler for bci 0
   // since at this moment the method hasn't been "officially" entered yet.
+#if 0
+	printf("exception handler for exception\n");
+#endif
   if (thread->do_not_unlock_if_synchronized()) {
     ResourceMark rm;
     assert(current_bci == 0,  "bci isn't zero for do_not_unlock_if_synchronized");
@@ -1093,6 +1108,7 @@
     } else {
       CHECK_UNHANDLED_OOPS_ONLY(Thread::current()->clear_unhandled_oops());
     }
+
     if (handler_index < 0) {
       // use generic signature handler
       method->set_signature_handler(Interpreter::slow_signature_handler());
@@ -1130,7 +1146,8 @@
   // preparing the same method will be sure to see non-null entry & mirror.
 IRT_END

-#if defined(IA32) || defined(AMD64)
+//FIXME, do mips need this?
+#if defined(IA32) || defined(AMD64) || defined(MIPS32)
 IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address))
   if (src_address == dest_address) {
     return;
--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -108,7 +109,8 @@
                                         methodOopDesc* method,
                                         intptr_t* from, intptr_t* to);

-#if defined(IA32) || defined(AMD64)
+//FIXME, do mips need this?
+#if defined(IA32) || defined(AMD64) || defined(MIPS32)
   // Popframe support (only needed on x86 and AMD64)
   static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address);
 #endif
--- a/hotspot/src/share/vm/interpreter/linkResolver.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/interpreter/linkResolver.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -942,7 +942,23 @@
   method_name      = symbolHandle(THREAD, pool->name_ref_at(index));
   method_signature = symbolHandle(THREAD, pool->signature_ref_at(index));
   current_klass    = KlassHandle(THREAD, pool->pool_holder());
-}
+
+
+/*  tty->print("%s %d\n", __FILE__, __LINE__);
+  tty->print("resoved class: ");
+  resolved_klass->name()->print_symbol_on(tty);
+  tty->print_cr("");
+  tty->print("current class: ");
+  current_klass->name()->print_symbol_on(tty);
+  tty->print_cr("");
+  method_name->print_symbol_on(tty);
+  method_signature->print_symbol_on(tty);
+  tty->print_cr("");*/
+ /* if(768 == index) {
+    ResourceMark rm(THREAD);
+    pool->print_cpool();
+  }*/
+ }


 void LinkResolver::resolve_invokestatic(CallInfo& result, constantPoolHandle pool, int index, TRAPS) {
--- a/hotspot/src/share/vm/interpreter/oopMapCache.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/interpreter/oopMapCache.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -225,7 +225,7 @@
   // $$$ This used to happen only for m/s collections; we might want to
   // think of an appropriate generalization of this distinction.
   guarantee(Universe::heap()->is_gc_active() ||
-            _method->is_oop_or_null(), "invalid oop in oopMapCache")
+            _method->is_oop_or_null(), "invalid oop in oopMapCache");
 }

 #ifdef ENABLE_ZAP_DEAD_LOCALS
--- a/hotspot/src/share/vm/memory/defNewGeneration.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/memory/defNewGeneration.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -505,6 +505,7 @@
                                bool   clear_all_soft_refs,
                                size_t size,
                                bool   is_tlab) {
+  //tty->print_cr("%s %x", __func__, __builtin_return_address(0));
   assert(full || size > 0, "otherwise we don't want to collect");
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   _next_gen = gch->next_gen(this);
@@ -901,6 +902,7 @@
 }

 void DefNewGeneration::verify(bool allow_dirty) {
+  print_on(tty);
   eden()->verify(allow_dirty);
   from()->verify(allow_dirty);
     to()->verify(allow_dirty);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/memory/referenceProcessorMT.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,187 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)referenceProcessorMT.hpp	1.9 06/06/01 00:49:09 JVM"
+#endif
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+// ReferenceProcessorMT class is a subclass of ReferenceProcessor,
+// providing multi-threaded reference processing capabilities.
+// It is currently only used by the CMS collector. The plan is
+// to allow ParNewGC to also use this in the near future.
+// For ParallelGC, a parallel (sibling) class will be needed.
+//
+// The basic idea here is that each of several threads may
+// both discover references in parallel as well as process
+// the discovered references. The parallel discovery capability
+// already exists in the basic ReferenceProcessor class currently;
+// this class augments that with parallel processing and enqueueing
+// capabilities by (essentially) providing alternate implementations
+// of the following virtual methods:
+//
+//
+
+// fwd decl
+class AbstractRefProcTask;
+
+class ReferenceProcessorMT: public ReferenceProcessor {
+  bool  _processing_is_mt; // true during phases when
+                           // reference processing is MT.
+  int   _next_id;          // round-robin counter in
+                           // support of work distribution
+
+ protected:
+  friend class ReferenceProcessorInitializer;
+  friend class ReferenceProcessorSerial;
+  friend class ReferenceProcessorParallel;
+
+  AbstractRefProcTask*       _par_task;
+
+ public:
+  // Override
+  virtual void process_discovered_reflist(oop* refs_list_addr,
+                                  ReferencePolicy *policy,
+                                  bool clear_referent);
+
+  // Override XXX this may be avoidable? FIX ME !!!
+  virtual void process_phaseJNI();
+
+  // "Preclean" the given discovered reference list
+  // by removing references with strongly reachable referents.
+  // Currently used in support of CMS only.
+  void preclean_discovered_reflist(oop* refs_list_addr,
+                                   BoolObjectClosure* is_alive,
+                                   YieldClosure*      yield);
+
+  virtual void enqueue_discovered_reflist(oop refs_list,
+                                  oop* pending_list_addr);
+ protected:
+  // Override with MT implementation
+  virtual oop* get_discovered_list(ReferenceType rt);
+  virtual void add_to_discovered_list_mt(oop* list, oop obj,
+                                         oop* discovered_addr);
+
+ private:
+  virtual void enqueue_discovered_reflists(oop* pending_list_addr);
+
+  int  next_id() {
+    int id = _next_id;
+    if (++_next_id == _num_q) {
+      _next_id = 0;
+    }
+    return id;
+  }
+
+ public:
+  // constructor
+  ReferenceProcessorMT():
+    ReferenceProcessor(),
+    _processing_is_mt(false),
+    _par_task(NULL),
+    _next_id(0)
+  {}
+
+  ReferenceProcessorMT(MemRegion span, bool atomic_discovery,
+                       bool mt_discovery, int mt_degree);
+
+  // Whether we are in a phase when _processing_ is MT.
+  bool processing_is_mt() const { return _processing_is_mt; }
+  void set_mt_processing(bool mt) { _processing_is_mt = mt; }
+
+ public:
+  // "Preclean" all the discovered reference lists
+  // by removing references with strongly reachable referents.
+  // The first argument is a predicate on an oop that indicates
+  // its (strong) reachability and the second is a closure that
+  // may be used to incrementalize or abort the precleaning process.
+  // The caller is responsible for taking care of potential
+  // interference with concurrent operations on these lists
+  // (or predicates involved) by other threads. Currently
+  // only used by the CMS collector.
+  void preclean_discovered_references(BoolObjectClosure* is_alive,
+                                      YieldClosure*      yield);
+};
+
+// A utility class used to "seed" the given ReferenceProcessorMT
+// instance with appropriate "input" values, and used subsequerntly
+// for reference prcoessing via a call to
+// ReferenceProcessor::process_discovered_references().
+class ReferenceProcessorParallel: public ReferenceProcessorInitializer {
+ protected:
+  virtual void is_clean() const {
+    ReferenceProcessorInitializer::is_clean();
+    assert(((ReferenceProcessorMT*)_rp)->_par_task == NULL, "dirty decks");
+  }
+ public:
+  ReferenceProcessorParallel(ReferenceProcessorMT*  rp, ReferencePolicy* policy,
+                             AbstractRefProcTask* par_task):
+    ReferenceProcessorInitializer(rp) {
+    assert(rp->processing_is_mt(), "Use ReferenceProcessorSerial");
+    rp->_par_task = par_task;
+    rp->_policy   = policy;
+  }
+  ~ReferenceProcessorParallel() {
+    ((ReferenceProcessorMT*)_rp)->_par_task = NULL;
+    _rp->_policy = NULL;
+  }
+};
+
+// A utility class to temporarily change the MT processing
+// disposition of the given ReferenceProcessorMT instance
+// in the scope that contains it.
+class ReferenceProcessorMTProcMutator: StackObj {
+ private:
+  ReferenceProcessorMT* _rp;
+  bool  _saved_mt;
+
+ public:
+  ReferenceProcessorMTProcMutator(ReferenceProcessorMT* rp,
+                                 bool  mt):
+    _rp(rp) {
+    _saved_mt = _rp->processing_is_mt();
+    _rp->set_mt_processing(mt);
+  }
+
+  ~ReferenceProcessorMTProcMutator() {
+    _rp->set_mt_processing(_saved_mt);
+  }
+};
+
+////////////////////////////////////////////////////
+// Parallel Reference Processing Task
+////////////////////////////////////////////////////////
+class AbstractRefProcTask: public AbstractGangTask {
+ public:
+  enum RefProcPhase {
+    Phase1,
+    Phase2,
+    Phase3,
+    PhaseJNI
+  };
+
+ protected:
+  ReferenceProcessorMT*  _rp;
+  int                  _n_workers;
+  WorkGang*            _workers;
+  ReferencePolicy*     _policy;
+  oop*                 _ref_list;
+  bool                 _clear_ref;
+  RefProcPhase         _phase;
+
+ public:
+  AbstractRefProcTask(const char* name,
+                      ReferenceProcessorMT* rp,
+                      int                 n_workers,
+                      WorkGang*           workers):
+    AbstractGangTask(name),
+    _rp(rp), _n_workers(n_workers), _workers(workers) {
+    assert(_rp->num_q() == _n_workers, "worker/queue mismatch");
+    assert(_n_workers == 1 || _workers != NULL, "no workers?");
+  }
+  virtual void reset() = 0;
+  void set_phase(RefProcPhase phase)  { _phase = phase; }
+  void set_policy(ReferencePolicy* p) { _policy = p; }
+  void set_ref_list(oop* list)        { _ref_list = list; }
+  void set_clear_ref(bool clear)      { _clear_ref = clear; }
+};
--- a/hotspot/src/share/vm/oops/constantPoolOop.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/oops/constantPoolOop.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1117,6 +1117,113 @@
   return size;
 } /* end hash_utf8_entries_to */

+#ifndef PRODUCT
+void constantPoolOopDesc::print_cpool() {
+  u2   idx1, idx2;
+  jint size  = 0;
+  jint cnt   = length();
+
+
+  tty->print("=======");
+  tty->print(" constant pool %d entries==============\n", cnt);
+  for (jint idx = 1; idx < cnt; idx++) {
+    u1   tag      = tag_at(idx).value();
+    jint ent_size = cpool_entry_size(idx);
+
+
+    tty->print("#%03hd tag=%03hd, ", idx, tag);
+    switch(tag) {
+      case JVM_CONSTANT_Invalid: {
+        tty->print("JVM_CONSTANT_Invalid");
+        break;
+      }
+      case JVM_CONSTANT_Unicode: {
+        assert(false, "Wrong constant pool tag: JVM_CONSTANT_Unicode");
+        tty->print("Unicode      %s", WARN_MSG);
+        break;
+      }
+      case JVM_CONSTANT_Utf8: {
+        symbolOop sym = symbol_at(idx);
+        char*     str = sym->as_utf8();
+        // Warning! It's crashing on x86 with len = sym->utf8_length()
+        tty->print("JVM_CONSTANT_Utf8: %s ", str);
+        break;
+      }
+      case JVM_CONSTANT_Integer: {
+        jint val = int_at(idx);
+        tty->print("int %d ", val);
+        break;
+      }
+      case JVM_CONSTANT_Float: {
+        jfloat val = float_at(idx);
+        tty->print("float %f ", val);
+        break;
+      }
+      case JVM_CONSTANT_Long: {
+        jlong val = long_at(idx);
+        tty->print("long %d ", val);
+        idx++;             // Long takes two cpool slots
+        break;
+      }
+      case JVM_CONSTANT_Double: {
+        jdouble val = double_at(idx);
+        tty->print("double %f ", val);
+        idx++;             // Double takes two cpool slots
+        break;
+      }
+      case JVM_CONSTANT_Class:
+      case JVM_CONSTANT_UnresolvedClass:
+      case JVM_CONSTANT_UnresolvedClassInError: {
+        symbolOop sym = klass_name_at(idx);
+        tty->print("JVM_CONSTANT_Class: idx=#%03hd, %s", idx, sym->as_utf8());
+        break;
+      }
+      case JVM_CONSTANT_String: {
+        unsigned int hash;
+        char *str = string_at_noresolve(idx);
+        symbolOop sym = SymbolTable::lookup_only(str, (int) strlen(str), hash);
+        tty->print("JVM_CONSTANT_String: idx=#%03hd, %s", idx, str);
+        break;
+      }
+      case JVM_CONSTANT_UnresolvedString: {
+        symbolOop sym = unresolved_string_at(idx);
+        char *str = sym->as_utf8();
+        tty->print("JVM_CONSTANT_UnresolvedString: idx=#%03hd, %s", idx, str);
+        break;
+      }
+      case JVM_CONSTANT_Fieldref:
+      case JVM_CONSTANT_Methodref:
+      case JVM_CONSTANT_InterfaceMethodref: {
+        idx1 = uncached_klass_ref_index_at(idx);
+        idx2 = uncached_name_and_type_ref_index_at(idx);
+        tty->print("JVM_CONSTANT_Methodref: %hd %hd", idx1, idx2);
+        break;
+      }
+      case JVM_CONSTANT_NameAndType: {
+        idx1 = name_ref_index_at(idx);
+        idx2 = signature_ref_index_at(idx);
+        tty->print("JVM_CONSTANT_NameAndType: %hd %hd", idx1, idx2);
+        break;
+      }
+      case JVM_CONSTANT_ClassIndex: {
+        idx1 = klass_index_at(idx);
+        tty->print("JVM_CONSTANT_ClassIndex: %hd", idx1);
+        break;
+      }
+      case JVM_CONSTANT_StringIndex: {
+        idx1 = string_index_at(idx);
+        tty->print("JVM_CONSTANT_StringIndex: %hd", idx1);
+        break;
+      }
+    }
+    tty->print_cr("");
+    size  += ent_size;
+  }
+}
+#else
+void constantPoolOopDesc::print_cpool() {
+}
+#endif

 // Copy cpool bytes.
 // Returns:
--- a/hotspot/src/share/vm/oops/constantPoolOop.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/oops/constantPoolOop.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -455,6 +455,7 @@
   int  orig_length() const                { return _orig_length; }
   void set_orig_length(int orig_length)   { _orig_length = orig_length; }

+  void print_cpool();

   // JVMTI accesss - GetConstantPool, RetransformClasses, ...
   friend class JvmtiConstantPoolReconstituter;
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -87,6 +87,7 @@
   }
 }

+static int call_class_initialize_counter = 0;   // for debugging

 // See "The Virtual Machine Specification" section 2.16.5 for a detailed explanation of the class initialization
 // process. The step comments refers to the procedure described in that section.
--- a/hotspot/src/share/vm/oops/methodOop.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/oops/methodOop.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -679,7 +679,6 @@
   if (adapter == NULL ) {
     THROW_0(vmSymbols::java_lang_OutOfMemoryError());
   }
-
   mh->set_adapter_entry(adapter);
   mh->_from_compiled_entry = adapter->get_c2i_entry();
   return adapter->get_c2i_entry();
--- a/hotspot/src/share/vm/oops/oop.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/oops/oop.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -29,6 +29,14 @@

 BarrierSet* oopDesc::_bs = NULL;

+klassOop oopDesc::klass() const {
+  if (UseCompressedOops) {
+    return (klassOop)decode_heap_oop_not_null(_metadata._compressed_klass);
+  } else {
+    return _metadata._klass;
+  }
+}
+
 #ifdef PRODUCT
 void oopDesc::print_on(outputStream* st) const {}
 void oopDesc::print_value_on(outputStream* st) const {}
@@ -95,12 +103,16 @@

 void oopDesc::verify_on(outputStream* st) {
   if (this != NULL) {
+    if( blueprint() == (void*)8)
+      tty->print_cr("%s:%d verify_on error %x", __FILE__, __LINE__, this);
     blueprint()->oop_verify_on(this, st);
   }
 }


-void oopDesc::verify() {
+void oopDesc::verify() {
+  if( blueprint() == (void*)8)
+    tty->print_cr("%s:%d verify error %x", __FILE__, __LINE__, this);
   verify_on(tty);
 }
--- a/hotspot/src/share/vm/oops/oop.inline.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -33,13 +33,6 @@
   return (markOop) Atomic::cmpxchg_ptr(new_mark, &_mark, old_mark);
 }

-inline klassOop oopDesc::klass() const {
-  if (UseCompressedOops) {
-    return (klassOop)decode_heap_oop_not_null(_metadata._compressed_klass);
-  } else {
-    return _metadata._klass;
-  }
-}

 inline klassOop oopDesc::klass_or_null() const volatile {
   // can be NULL in CMS
--- a/hotspot/src/share/vm/prims/jvm.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/prims/jvm.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -234,6 +234,18 @@
   arrayOop d = arrayOop(JNIHandles::resolve_non_null(dst));
   assert(s->is_oop(), "JVM_ArrayCopy: src not an oop");
   assert(d->is_oop(), "JVM_ArrayCopy: dst not an oop");
+
+  oop p = oop(JNIHandles::resolve_non_null(dst));
+  if(dst_pos == badHeapWordVal || length == badHeapWordVal) {
+    //tty->print_cr("%s:%d: %x-%x  %x-%x %x    %x ", __FILE__, __LINE__, s, d, src_pos, dst_pos, length, p);
+    for(int i = 0; i < 4; i ++) {
+	tty->print("%x ",*((int*)p + i) );
+    }
+    p->print();
+    s->print();
+    d->print();
+  }
+
   // Do copy
   Klass::cast(s->klass())->copy_array(s, src_pos, d, dst_pos, length, thread);
 JVM_END
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/prims/jvmpi.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,3839 @@
+#ifdef USE_PRAGMA_IDENT_SRC
+#pragma ident "@(#)jvmpi.cpp	1.173 06/07/27 11:15:10 JVM"
+#endif
+//
+// Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+// SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+//
+
+# include "incls/_precompiled.incl"
+# include "incls/_jvmpi.cpp.incl"
+
+
+//-------------------------------------------------------
+
+// Unsolved problems:
+//
+// CPU profiling
+// - we need an exact mapping of pc/bci to lineNo; this is not
+//   always possible as:
+//    - interpreted has not set up the frame completely
+//    - compiled code is not at a safepoint
+
+//-------------------------------------------------------
+
+
+// define raw monitor validity checking
+static int PROF_RM_MAGIC = (int)(('P' << 24) | ('I' << 16) | ('R' << 8) | 'M');
+#define PROF_RM_CHECK(m)                                 \
+    ((m) != NULL && (m)->magic() == PROF_RM_MAGIC)
+
+
+unsigned int jvmpi::_event_flags = JVMPI_PROFILING_OFF;
+unsigned int jvmpi::_event_flags_array[JVMPI_MAX_EVENT_TYPE_VAL + 1];
+JVMPI_Interface jvmpi::jvmpi_interface;
+bool jvmpi::slow_allocation = false;
+
+class VM_JVMPIPostObjAlloc: public VM_Operation {
+ private:
+  static volatile bool _restrict_event_posting;
+  static Thread *      _create_thread;
+
+ public:
+  VM_JVMPIPostObjAlloc() {
+    // local fields are initialized when declared
+  }
+
+  ~VM_JVMPIPostObjAlloc() {
+    clear_restriction();
+  }
+
+  static void clear_restriction();
+  static const Thread *create_thread() {
+    return _create_thread;
+  }
+  void doit();
+  const char* name() const {
+    return "post JVMPI object allocation";
+  }
+  static void set_create_thread(Thread *thr) {
+    _create_thread = thr;
+  }
+  static void wait_if_restricted();
+};
+
+
+// JVMPI_EVENT_THREAD_START events can be posted by the thread itself
+// or by the VM initialization thread which can result in duplicate
+// THREAD_START events. Duplicate events are not acceptable unless the
+// event is specifically requested. We track all threads that have
+// posted THREAD_START events until after any needed synthetic events
+// from jvmpi::post_vm_initialization_events() are posted.
+//
+// The global thread_start_posted_list is protected by the Threads_lock.
+// Since both real and synthetic THREAD_START event posters use
+// jvmpi::post_thread_start_event() we will have no dups.
+//
+// We could have used a flag in the JavaThread, but why take up space
+// to solve a race that only happens during VM initialization? We also
+// want to isolate this bit of JVM/PI strangeness here.
+
+static GrowableArray<JavaThread *>* thread_start_posted_list = NULL;
+static volatile bool track_thread_start_events = true;
+
+// Returns true if a THREAD_START event has already been posted
+// for this thread.
+static bool check_for_and_record_thread_start_event(JavaThread* thread) {
+  assert(Threads_lock->owned_by_self(), "sanity check");
+
+  if (track_thread_start_events) {
+    // we are still in the race condition region
+
+    if (thread_start_posted_list == NULL) {
+      // first thread to post THREAD_START event so setup initial space
+      thread_start_posted_list =
+        new (ResourceObj::C_HEAP) GrowableArray<JavaThread*>(3, true);
+    }
+
+    if (thread_start_posted_list->contains(thread)) {
+      // real or synthetic event already posted; don't post another
+      return true;
+    }
+
+    // record this thread and let real or synthetic event be posted
+    thread_start_posted_list->append(thread);
+    return false;
+  }
+
+  // We are past the point where synthetic events can be posted. No
+  // need to record threads anymore, but we have to check against the
+  // history to see if an event was posted before.
+
+  if (thread_start_posted_list == NULL) {
+    // tracking list was not setup so let the thread post its own event
+    return false;
+  }
+
+  if (thread_start_posted_list->contains(thread)) {
+    // synthetic event already posted; don't post another
+    return true;
+  }
+
+  // let thread post its own event
+  return false;
+} // end check_for_and_record_thread_start_event()
+
+
+static void stop_tracking_thread_start_events() {
+  track_thread_start_events = false;
+}
+
+
+void jvmpi::initialize(int version) {
+  // Exit with an error if we are using a jvmpi-incompatible garbage collector,
+  // unless explicitly overridden via JVMPICheckGCCompatibility (needed for using
+  // Analyzer with these non-jvmpi collectors; see bug 4889433).
+  if (JVMPICheckGCCompatibility &&
+      (UseConcMarkSweepGC || UseParNewGC || UseParallelGC)) {
+    vm_exit_during_initialization(
+      "JVMPI not supported with this garbage collector; "
+      "please refer to the GC/JVMPI documentation");
+  }
+
+  // The creating thread requests the VM_JVMPIPostObjAlloc VM operation
+  // so it the restriction should not apply to its events (if any).
+  VM_JVMPIPostObjAlloc::set_create_thread(ThreadLocalStorage::thread());
+
+  // Enable JVMPI
+  _event_flags |= JVMPI_PROFILING_ON;
+
+  // First, initialize all JVMPI defined event notifications
+  // to be not available
+  for (int i= 0; i <= JVMPI_MAX_EVENT_TYPE_VAL; i++) {
+    _event_flags_array[i] = JVMPI_EVENT_NOT_SUPPORTED;
+  }
+
+  // Then, initialize events supported by the HotSpot VM
+  // to be initially disabled.
+  disable_event(JVMPI_EVENT_CLASS_LOAD);
+  disable_event(JVMPI_EVENT_CLASS_UNLOAD);
+  disable_event(JVMPI_EVENT_CLASS_LOAD_HOOK);
+  disable_event(JVMPI_EVENT_OBJECT_ALLOC);
+  disable_event(JVMPI_EVENT_OBJECT_FREE);
+  // JVMPI_VERSION_1_1 is upward compatible from JVMPI_VERSION_1 so enable
+  // the INSTRUCTION_START event
+  disable_event(JVMPI_EVENT_INSTRUCTION_START);
+  disable_event(JVMPI_EVENT_THREAD_START);
+  disable_event(JVMPI_EVENT_THREAD_END);
+  disable_event(JVMPI_EVENT_JNI_GLOBALREF_ALLOC);
+  disable_event(JVMPI_EVENT_JNI_GLOBALREF_FREE);
+  disable_event(JVMPI_EVENT_JNI_WEAK_GLOBALREF_ALLOC);
+  disable_event(JVMPI_EVENT_JNI_WEAK_GLOBALREF_FREE);
+  disable_event(JVMPI_EVENT_METHOD_ENTRY);
+  disable_event(JVMPI_EVENT_METHOD_ENTRY2);
+  disable_event(JVMPI_EVENT_METHOD_EXIT);
+  disable_event(JVMPI_EVENT_LOAD_COMPILED_METHOD);
+  disable_event(JVMPI_EVENT_UNLOAD_COMPILED_METHOD);
+  disable_event(JVMPI_EVENT_JVM_INIT_DONE);
+  disable_event(JVMPI_EVENT_JVM_SHUT_DOWN);
+  disable_event(JVMPI_EVENT_DUMP_DATA_REQUEST);
+  disable_event(JVMPI_EVENT_RESET_DATA_REQUEST);
+  disable_event(JVMPI_EVENT_OBJECT_MOVE);
+  disable_event(JVMPI_EVENT_ARENA_NEW);
+  disable_event(JVMPI_EVENT_DELETE_ARENA);
+  disable_event(JVMPI_EVENT_RAW_MONITOR_CONTENDED_ENTER);
+  disable_event(JVMPI_EVENT_RAW_MONITOR_CONTENDED_ENTERED);
+  disable_event(JVMPI_EVENT_RAW_MONITOR_CONTENDED_EXIT);
+  disable_event(JVMPI_EVENT_MONITOR_CONTENDED_ENTER);
+  disable_event(JVMPI_EVENT_MONITOR_CONTENDED_ENTERED);
+  disable_event(JVMPI_EVENT_MONITOR_CONTENDED_EXIT);
+  disable_event(JVMPI_EVENT_MONITOR_WAIT);
+  disable_event(JVMPI_EVENT_MONITOR_WAITED);
+  disable_event(JVMPI_EVENT_GC_START);
+  disable_event(JVMPI_EVENT_GC_FINISH);
+
+  // return highest upward compatible version number
+  if (UseSuspendResumeThreadLists) {
+    jvmpi_interface.version               = JVMPI_VERSION_1_2;
+  } else {
+    jvmpi_interface.version               = JVMPI_VERSION_1_1;
+  }
+
+  // initialize the jvmpi_interface functions
+  jvmpi_interface.EnableEvent             = &enable_event;
+  jvmpi_interface.DisableEvent            = &disable_event;
+
+  jvmpi_interface.RequestEvent            = &request_event;
+  jvmpi_interface.GetCallTrace            = &get_call_trace;
+  jvmpi_interface.GetCurrentThreadCpuTime = &get_current_thread_cpu_time;
+  jvmpi_interface.ProfilerExit            = &profiler_exit;
+  jvmpi_interface.RawMonitorCreate        = &raw_monitor_create;
+  jvmpi_interface.RawMonitorEnter         = &raw_monitor_enter;
+  jvmpi_interface.RawMonitorExit          = &raw_monitor_exit;
+  jvmpi_interface.RawMonitorWait          = &raw_monitor_wait;
+  jvmpi_interface.RawMonitorNotifyAll     = &raw_monitor_notify_all;
+  jvmpi_interface.RawMonitorDestroy       = &raw_monitor_destroy;
+  jvmpi_interface.SuspendThread           = &suspend_thread;
+  jvmpi_interface.ResumeThread            = &resume_thread;
+  jvmpi_interface.GetThreadStatus         = &get_thread_status;
+  jvmpi_interface.ThreadHasRun            = &thread_has_run;
+  jvmpi_interface.CreateSystemThread      = &create_system_thread;
+  jvmpi_interface.SetThreadLocalStorage   = &set_thread_local_storage;
+  jvmpi_interface.GetThreadLocalStorage   = &get_thread_local_storage;
+
+  jvmpi_interface.DisableGC               = &disable_gc;
+  jvmpi_interface.EnableGC                = &enable_gc;
+
+  jvmpi_interface.RunGC                   = &run_gc;
+  jvmpi_interface.GetThreadObject         = &get_thread_object;
+  jvmpi_interface.GetMethodClass          = &get_method_class;
+
+  // JVMPI_VERSION_1_1 is upward compatible from JVMPI_VERSION_1 so set
+  // up function pointers for jobjectID2jobject and jobject2jobjectID
+  jvmpi_interface.jobjectID2jobject     = &jobjectID_2_jobject;
+  jvmpi_interface.jobject2jobjectID     = &jobject_2_jobjectID;
+
+  // JVMPI_VERSION_1_2 is upward compatible from previous versions, but
+  // it can be turned disabled via the UseSuspendResumeThreadLists option.
+  if (UseSuspendResumeThreadLists) {
+    jvmpi_interface.SuspendThreadList     = &suspend_thread_list;
+    jvmpi_interface.ResumeThreadList      = &resume_thread_list;
+  } else {
+    jvmpi_interface.SuspendThreadList     = NULL;
+    jvmpi_interface.ResumeThreadList      = NULL;
+  }
+}
+
+
+JVMPI_Interface* jvmpi::GetInterface_1(int version) {
+  initialize(version);
+  return &jvmpi_interface;
+}
+
+static void _pass()     { return; }
+static void _block()    {
+  while (true) {
+    VM_Exit::block_if_vm_exited();
+
+    // VM has not yet reached final safepoint, but it will get there very soon
+    Thread *thr = ThreadLocalStorage::get_thread_slow();
+    if (thr) os::yield_all(100);     // yield_all() needs a thread on Solaris
+  }
+}
+
+// disable JVMPI - this is called during VM shutdown, after the
+// JVM_SHUT_DOWN event.
+void jvmpi::disengage() {
+  _event_flags = JVMPI_PROFILING_OFF;
+
+  address block_func = CAST_FROM_FN_PTR(address, _block);
+  address pass_func  = CAST_FROM_FN_PTR(address, _pass);
+
+  // replace most JVMPI interface functions with infinite loops
+  jvmpi_interface.EnableEvent =
+      CAST_TO_FN_PTR(jint(*)(jint, void*), block_func);
+  jvmpi_interface.DisableEvent =
+      CAST_TO_FN_PTR(jint(*)(jint, void*), block_func);
+  jvmpi_interface.RequestEvent =
+      CAST_TO_FN_PTR(jint(*)(jint, void*), block_func);
+  jvmpi_interface.GetCallTrace =
+      CAST_TO_FN_PTR(void(*)(JVMPI_CallTrace*, jint), block_func);
+  jvmpi_interface.GetCurrentThreadCpuTime =
+      CAST_TO_FN_PTR(jlong(*)(void), block_func);
+  // allow ProfilerExit to go through
+  // jvmpi_interface.ProfilerExit = CAST_TO_FN_PTR(void(*)(jint), block_func);
+  jvmpi_interface.RawMonitorCreate =
+      CAST_TO_FN_PTR(JVMPI_RawMonitor(*)(char*), block_func);
+  jvmpi_interface.RawMonitorEnter =
+      CAST_TO_FN_PTR(void(*)(JVMPI_RawMonitor), block_func);
+  jvmpi_interface.RawMonitorExit =
+      CAST_TO_FN_PTR(void(*)(JVMPI_RawMonitor), block_func);
+  jvmpi_interface.RawMonitorWait =
+      CAST_TO_FN_PTR(void(*)(JVMPI_RawMonitor, jlong), block_func);
+  jvmpi_interface.RawMonitorNotifyAll =
+      CAST_TO_FN_PTR(void(*)(JVMPI_RawMonitor), block_func);
+  jvmpi_interface.RawMonitorDestroy =
+      CAST_TO_FN_PTR(void(*)(JVMPI_RawMonitor), block_func);
+  jvmpi_interface.SuspendThread =
+      CAST_TO_FN_PTR(void(*)(JNIEnv*), block_func);
+  jvmpi_interface.ResumeThread =
+      CAST_TO_FN_PTR(void(*)(JNIEnv*), block_func);
+  jvmpi_interface.GetThreadStatus =
+      CAST_TO_FN_PTR(jint(*)(JNIEnv*), block_func);
+  jvmpi_interface.ThreadHasRun =
+      CAST_TO_FN_PTR(jboolean(*)(JNIEnv*), block_func);
+  jvmpi_interface.CreateSystemThread =
+      CAST_TO_FN_PTR(jint(*)(char*, jint, jvmpi_void_function_of_void), block_func);
+  // Allow GetThreadLocalStorage() and SetThreadLocalStorage() to
+  // go through since they simply manage thread local storage.
+  // jvmpi_interface.SetThreadLocalStorage =
+  //     CAST_TO_FN_PTR(void(*)(JNIEnv*, void*), block_func);
+  // jvmpi_interface.GetThreadLocalStorage =
+  //     CAST_TO_FN_PTR(void*(*)(JNIEnv*), block_func);
+  jvmpi_interface.DisableGC =
+      CAST_TO_FN_PTR(void(*)(void), block_func);
+  jvmpi_interface.EnableGC =
+      CAST_TO_FN_PTR(void(*)(void), block_func);
+  jvmpi_interface.RunGC =
+      CAST_TO_FN_PTR(void(*)(void), block_func);
+  jvmpi_interface.GetThreadObject =
+      CAST_TO_FN_PTR(jobjectID(*)(JNIEnv*), block_func);
+  jvmpi_interface.GetMethodClass =
+      CAST_TO_FN_PTR(jobjectID(*)(jmethodID), block_func);
+  jvmpi_interface.jobjectID2jobject =
+      CAST_TO_FN_PTR(jobject(*)(jobjectID), block_func);
+  jvmpi_interface.jobject2jobjectID =
+      CAST_TO_FN_PTR(jobjectID(*)(jobject), block_func);
+
+  // NotifyEvent() is called from VM, do not block
+  jvmpi_interface.NotifyEvent =
+      CAST_TO_FN_PTR(void(*)(JVMPI_Event*), pass_func);
+}
+
+inline void jvmpi::post_event_common(JVMPI_Event* event) {
+
+  // Check for restrictions related to the VM_JVMPIPostObjAlloc VM
+  // operation. JavaThreads will wait here if the VM operation is
+  // in process in order to prevent deadlock.
+  VM_JVMPIPostObjAlloc::wait_if_restricted();
+
+  // notify profiler agent
+  jvmpi_interface.NotifyEvent(event);
+}
+
+inline void jvmpi::post_event(JVMPI_Event* event) {
+  Thread* thread = Thread::current();
+  assert(thread->is_Java_thread(), "expecting a Java thread");
+
+  JavaThread* jthread = (JavaThread*)thread;
+  event->env_id = jthread->jni_environment();
+  // prepare to call out across JVMPI
+  ThreadToNativeFromVM transition(jthread);
+  HandleMark  hm(thread);
+  // notify profiler agent
+  post_event_common(event);
+}
+
+// JVMPI 2.0: should cleanup race condition where calling_thread
+// exits before being notified.
+inline void jvmpi::post_event_vm_mode(JVMPI_Event* event, JavaThread* calling_thread) {
+  Thread* thread = Thread::current();
+  if (thread->is_Java_thread()) {
+    // JVMPI doesn't do proper transitions on RAW_ENTRY
+    // When it does do this can be enabled.
+#ifdef PROPER_TRANSITIONS
+    assert(((JavaThread*)thread)->thread_state() == _thread_in_vm, "Only vm mode expected");
+    post_event(event);
+#else
+    JavaThread* jthread = (JavaThread*)thread;
+    JavaThreadState saved_state = jthread->thread_state();
+
+    if (saved_state == _thread_in_vm) {
+      // same as conditions for post_event() so use it
+      post_event(event);
+      return;
+    }
+
+    // We are about to transition to _thread_in_native. See if there
+    // is an external suspend requested before we go. If there is,
+    // then we do a self-suspend. We don't need to do this for
+    // post_event() because it uses ThreadToNativeFromVM.
+
+    if (jthread->is_external_suspend_with_lock()) {
+      jthread->java_suspend_self();
+    }
+
+    event->env_id = jthread->jni_environment();
+    // prepare to call out across JVMPI
+    jthread->frame_anchor()->make_walkable(jthread);
+    if (saved_state == _thread_in_Java) {
+      ThreadStateTransition::transition_from_java(jthread, _thread_in_native);
+    } else if (saved_state != _thread_in_native) {
+      // Nested events are already in _thread_in_native and don't need
+      // to transition again.
+      ThreadStateTransition::transition(jthread, saved_state, _thread_in_native);
+    }
+    HandleMark  hm(thread);
+    // notify profiler agent
+    post_event_common(event);
+    // restore state prior to posting event
+    ThreadStateTransition::transition_from_native(jthread, saved_state);
+#endif /* PROPER_TRANSITIONS */
+  } else {
+    if (thread->is_VM_thread()) {
+      // calling from VM thread
+
+      if (calling_thread == NULL) {
+	  calling_thread = JavaThread::active();
+      }
+
+      assert(calling_thread != NULL && calling_thread->is_Java_thread(),
+	     "wrong thread, expecting Java thread");
+
+      event->env_id = (calling_thread != NULL &&
+		       calling_thread->is_Java_thread()) ?
+		       calling_thread->jni_environment() : NULL;
+    } else {
+      event->env_id = calling_thread->jni_environment();
+    }
+    // notify profiler agent
+    post_event_common(event);
+  }
+}
+
+
+// ----------------------------------------------------------
+// Functions called by other parts of the VM to notify events
+// ----------------------------------------------------------
+
+void issue_jvmpi_class_load_event(klassOop k) {
+  jvmpi::post_class_load_event(Klass::cast(k)->java_mirror());
+}
+
+
+class IssueJVMPIobjAllocEvent: public ObjectClosure {
+ public:
+  void do_object(oop obj) {
+    Universe::jvmpi_object_alloc(obj, obj->size() * wordSize);
+  };
+};
+
+volatile bool VM_JVMPIPostObjAlloc::_restrict_event_posting = true;
+Thread *      VM_JVMPIPostObjAlloc::_create_thread = NULL;
+
+void VM_JVMPIPostObjAlloc::clear_restriction() {
+  // See MutexLockerEx comment in wait_if_restricted().
+  MutexLockerEx loap(ObjAllocPost_lock, Mutex::_no_safepoint_check_flag);
+
+  // Lower restriction since we are done with the VM operation
+  _restrict_event_posting = false;
+
+  // let any waiting threads resume
+  ObjAllocPost_lock->notify_all();
+}
+
+void VM_JVMPIPostObjAlloc::doit() {
+  // Issue object allocation events for all allocated objects
+  IssueJVMPIobjAllocEvent blk;
+
+  // make sure the heap's parseable before iterating over it
+  Universe::heap()->ensure_parseability();
+  Universe::heap()->object_iterate(&blk);
+}
+
+// The restriction is true by default to allow wait_if_restricted()
+// to query the value without holding the lock. This imposes the
+// least overhead on later calls to post_event_common(). Since any
+// event handler can request an OBJ_ALLOC event, we have to restrict
+// all other events until the VMThread is done with its housekeeping.
+//
+// There are five cases to consider:
+// 1) The VMThread calls wait_if_restricted() as part of its
+//    OBJ_ALLOC posting. It will not grab the lock and will not
+//    block due to the second if-statement.
+// 2) The JavaThread that will eventually make this VM operation
+//    request calls wait_if_restricted() before the VM op is created.
+//    It will not grab the lock and will not block due to the second
+//    if-statement.
+//
+// The remaining cases apply to JavaThreads that are not making this
+// VM operation request.
+//
+// 3) A JavaThread that calls wait_if_restricted() before the VM
+//    op is created will always grab the lock and then enter the
+//    check-and-wait() loop.
+// 4) A JavaThread that calls wait_if_restricted() after the VM
+//    op is created but before it is finished will always grab the
+//    lock and then enter the check-and-wait() loop.
+// 5) A JavaThread that calls wait_if_restricted() after the VM
+//    op is finished will see the false value and will not block.
+//
+// If the restriction is false by default and then set to true in
+// the VM op constructor, then we have to guard the query with a
+// lock grab to prevent a race between the JavaThread and the
+// VMThread. Without the lock grab, it would be possible for the
+// JavaThread to see the "false" value just before the constructor
+// sets that the value to true. At that point, the JavaThread
+// would be racing to finish its event posting before the VMThread
+// blocks it in a safepoint.
+//
+void VM_JVMPIPostObjAlloc::wait_if_restricted() {
+  if (_restrict_event_posting) {
+    // a restriction has been raised
+
+    // The restriction does not apply to the VMThread nor does it
+    // apply to the thread that makes the VM_JVMPIPostObjAlloc
+    // VM operation request.
+    Thread *thr = ThreadLocalStorage::thread();
+    if (thr != NULL && !thr->is_VM_thread() && thr != create_thread()) {
+      // The restriction applies to this thread. We use
+      // MutexLockerEx to allow the lock to work just
+      // before calling into the agent's code (native).
+      MutexLockerEx loap(ObjAllocPost_lock, Mutex::_no_safepoint_check_flag);
+      while (_restrict_event_posting) {
+        ObjAllocPost_lock->wait(Mutex::_no_safepoint_check_flag, 0);
+      }
+    }
+  }
+}
+
+void jvmpi::post_vm_initialization_events() {
+  if (Universe::jvmpi_alloc_event_enabled()) {
+    // Issue the object allocation events thru a VM operation since
+    // it needs to be done at a safepoint
+    VM_JVMPIPostObjAlloc op;
+    VMThread::execute(&op);
+  } else {
+    // lift the restriction since we didn't do the VM operation
+    VM_JVMPIPostObjAlloc::clear_restriction();
+  }
+
+  if (!jvmpi::enabled()) {
+    // no agent is attached and the event posting restriction is now
+    // lifted so there is nothing more to do
+
+    // We don't conditionally enable the THREAD_START tracking mechanism
+    // so we always have to disable it.
+    stop_tracking_thread_start_events();
+    return;
+  }
+
+  assert(!JVMPICheckGCCompatibility ||
+         !(UseConcMarkSweepGC || UseParNewGC || UseParallelGC),
+         "JVMPI-incompactible collector; jvm should have exited during "
+         " JVMPI initialization");
+
+  if (jvmpi::is_event_enabled(JVMPI_EVENT_CLASS_LOAD)) {
+    // Issue class load events for all loaded classes
+    // Note: This must happen _after_ the allocation events, otherwise hprof has problems!
+    SystemDictionary::classes_do(&issue_jvmpi_class_load_event);
+  }
+
+  if (jvmpi::is_event_enabled(JVMPI_EVENT_THREAD_START)) {
+    // Issue thread creation events for all started threads
+    int k = 0;
+    int threadcount;
+    JavaThread** ThreadSnapShot;
+    { MutexLocker mu(Threads_lock);
+      threadcount = Threads::number_of_threads();
+      ThreadSnapShot = NEW_C_HEAP_ARRAY(JavaThread*, threadcount);
+      for (JavaThread* tp = Threads::first() ; (tp != NULL) && ( k < threadcount); tp = tp->next(), k++) {
+        ThreadSnapShot[k] = tp;
+      }
+    } // Release Threads_lock before calling up to agent code
+    for (k = 0; k<threadcount; k++) {
+      jvmpi::post_thread_start_event(ThreadSnapShot[k]);
+    }
+    FREE_C_HEAP_ARRAY(JavaThread*, ThreadSnapShot);
+  }
+
+  // we are now past the point of posting synthetic THREAD_START events
+  stop_tracking_thread_start_events();
+}
+
+
+void jvmpi::post_vm_initialized_event() {
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_JVM_INIT_DONE;
+  post_event(&event);
+}
+
+void jvmpi::post_vm_death_event() {
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_JVM_SHUT_DOWN;
+  post_event_vm_mode(&event, NULL);
+}
+
+
+void jvmpi::post_instruction_start_event(const frame& f) {
+  ResourceMark rm;
+  JVMPI_Event event;
+
+  methodOop method = f.interpreter_frame_method();
+  address   bcp    = f.interpreter_frame_bcp();
+
+  // fill in generic information
+  event.event_type = JVMPI_EVENT_INSTRUCTION_START;
+  event.u.instruction.method_id = method->jmethod_id();
+  event.u.instruction.offset    = method->bci_from(bcp);
+
+  // debugging
+#ifdef ASSERT
+  switch (Bytecodes::java_code(Bytecodes::cast(*bcp))) {
+    case Bytecodes::_tableswitch : // fall through
+    case Bytecodes::_lookupswitch: // fall through
+    case Bytecodes::_ifnull      : // fall through
+    case Bytecodes::_ifeq        : // fall through
+    case Bytecodes::_ifnonnull   : // fall through
+    case Bytecodes::_ifne        : // fall through
+    case Bytecodes::_iflt        : // fall through
+    case Bytecodes::_ifge        : // fall through
+    case Bytecodes::_ifgt        : // fall through
+    case Bytecodes::_ifle        : assert(f.interpreter_frame_expression_stack_size() >= 1, "stack size must be >= 1"); break;
+    case Bytecodes::_if_acmpeq   : // fall through
+    case Bytecodes::_if_icmpeq   : // fall through
+    case Bytecodes::_if_acmpne   : // fall through
+    case Bytecodes::_if_icmpne   : // fall through
+    case Bytecodes::_if_icmplt   : // fall through
+    case Bytecodes::_if_icmpge   : // fall through
+    case Bytecodes::_if_icmpgt   : // fall through
+    case Bytecodes::_if_icmple   : assert(f.interpreter_frame_expression_stack_size() >= 2, "stack size must be >= 2"); break;
+  }
+#endif
+
+  // fill in bytecode-specific information
+  //
+  // Note: This code is necessary to satisfy the current interface for the jcov
+  //       code coverage tool. The interface should be simplified and generalized
+  //       to provide expression stack access instead of specific information for
+  //       a few bytecodes only. Given expression stack access, the code below
+  //       can move into jcov, the interface becomes simpler, more general, and
+  //       also more powerful. With the next version/revision of JVMPI this clean
+  //       up should be seriously considered (gri 11/18/99).
+
+  int  size  = f.interpreter_frame_expression_stack_size();
+  jint tos_0 = size > 0 ? *f.interpreter_frame_expression_stack_at(size - 1) : 0;
+  jint tos_1 = size > 1 ? *f.interpreter_frame_expression_stack_at(size - 2) : 0;
+
+  switch (Bytecodes::java_code(Bytecodes::cast(*bcp))) {
+    case Bytecodes::_tableswitch :
+      { const Bytecode_tableswitch* s = Bytecode_tableswitch_at(bcp);
+        event.u.instruction.u.tableswitch_info.key = tos_0;
+        event.u.instruction.u.tableswitch_info.low = s->low_key();
+        event.u.instruction.u.tableswitch_info.hi  = s->high_key();
+      }
+      break;
+    case Bytecodes::_lookupswitch:
+      { Bytecode_lookupswitch* s = Bytecode_lookupswitch_at(bcp);
+        int i;
+        for (i = 0; i < s->number_of_pairs() && tos_0 != s->pair_at(i)->match(); i++);
+        event.u.instruction.u.lookupswitch_info.chosen_pair_index = i;
+        event.u.instruction.u.lookupswitch_info.pairs_total       = s->number_of_pairs();
+      }
+      break;
+    case Bytecodes::_ifnull      : // fall through
+    case Bytecodes::_ifeq        : event.u.instruction.u.if_info.is_true = tos_0 == 0; break;
+    case Bytecodes::_ifnonnull   : // fall through
+    case Bytecodes::_ifne        : event.u.instruction.u.if_info.is_true = tos_0 != 0; break;
+    case Bytecodes::_iflt        : event.u.instruction.u.if_info.is_true = tos_0 <  0; break;
+    case Bytecodes::_ifge        : event.u.instruction.u.if_info.is_true = tos_0 >= 0; break;
+    case Bytecodes::_ifgt        : event.u.instruction.u.if_info.is_true = tos_0 >  0; break;
+    case Bytecodes::_ifle        : event.u.instruction.u.if_info.is_true = tos_0 <= 0; break;
+    case Bytecodes::_if_acmpeq   : // fall through
+    case Bytecodes::_if_icmpeq   : event.u.instruction.u.if_info.is_true = tos_1 == tos_0; break;
+    case Bytecodes::_if_acmpne   : // fall through
+    case Bytecodes::_if_icmpne   : event.u.instruction.u.if_info.is_true = tos_1 != tos_0; break;
+    case Bytecodes::_if_icmplt   : event.u.instruction.u.if_info.is_true = tos_1 <  tos_0; break;
+    case Bytecodes::_if_icmpge   : event.u.instruction.u.if_info.is_true = tos_1 >= tos_0; break;
+    case Bytecodes::_if_icmpgt   : event.u.instruction.u.if_info.is_true = tos_1 >  tos_0; break;
+    case Bytecodes::_if_icmple   : event.u.instruction.u.if_info.is_true = tos_1 <= tos_0; break;
+  }
+
+  post_event(&event);
+}
+
+void jvmpi::post_thread_start_event(JavaThread* thread, jint flag)
+{
+  ResourceMark rm;
+  JVMPI_Event event;
+
+  assert(!Threads_lock->owned_by_self(), "must not own threads_lock for notify");
+
+  { MutexLocker mu(Threads_lock);
+
+    // Do not post thread start event for hidden java thread.
+    if (thread->is_hidden_from_external_view()) return;
+
+    if (flag != JVMPI_REQUESTED_EVENT &&
+        check_for_and_record_thread_start_event(thread)) {
+      // Prevent duplicate THREAD_START events unless there is a
+      // specific request for the event.
+      return;
+    }
+
+    event.event_type = JVMPI_EVENT_THREAD_START | flag;
+
+    event.u.thread_start.thread_name   = (char*)thread->get_thread_name();
+    event.u.thread_start.group_name    = (char*)thread->get_threadgroup_name();
+    event.u.thread_start.parent_name   = (char*)thread->get_parent_name();
+    event.u.thread_start.thread_id     = (jobjectID)thread->threadObj();
+    event.u.thread_start.thread_env_id = thread->jni_environment();
+  } // Release Threads_lock
+  if (TraceJVMPI) {
+    tty->cr();
+    tty->print_cr("JVMPI: post_thread_start_event for thread id " INTPTR_FORMAT " [thread " INTPTR_FORMAT " <%s>] ",
+		  event.u.thread_start.thread_id, thread, event.u.thread_start.thread_name);
+  }
+
+  GC_locker::lock();
+  post_event_vm_mode(&event, NULL);
+  GC_locker::unlock();
+}
+
+void jvmpi::post_thread_start_event(JavaThread* thread) {
+  post_thread_start_event(thread, 0);
+}
+
+void jvmpi::post_thread_end_event(JavaThread* thread) {
+  ResourceMark rm;
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_THREAD_END;
+
+  { MutexLocker mu(Threads_lock);
+
+    // Do not post thread end event for hidden java thread.
+    if (thread->is_hidden_from_external_view()) return;
+
+    event.u.thread_start.thread_name   = (char* )thread->get_thread_name();
+    event.u.thread_start.group_name    = (char*)thread->get_threadgroup_name();
+    event.u.thread_start.parent_name   = (char* )thread->get_parent_name();
+    event.u.thread_start.thread_id     = (jobjectID)thread->threadObj();
+    event.u.thread_start.thread_env_id = thread->jni_environment();
+  } // Release Threads_lock
+
+  if (TraceJVMPI) {
+    tty->cr();
+    tty->print_cr("JVMPI: post_thread_end_event for thread id " INTPTR_FORMAT " [thread " INTPTR_FORMAT " <%s>] ",
+		  event.u.thread_start.thread_id, thread, event.u.thread_start.thread_name);
+  }
+  post_event(&event);
+}
+
+void jvmpi::fillin_array_class_load_event(oop kOop, JVMPI_Event *eventp) {
+  Klass *k = Klass::cast(java_lang_Class::as_klassOop(kOop));
+  assert(k->oop_is_array(), "must be array classes");
+
+  eventp->event_type                       = JVMPI_EVENT_CLASS_LOAD;
+  eventp->u.class_load.class_name          = k->external_name();
+  eventp->u.class_load.source_name         = NULL;
+  eventp->u.class_load.num_interfaces      = 0;
+  eventp->u.class_load.num_methods         = 0;
+  eventp->u.class_load.methods             = NULL;
+  eventp->u.class_load.num_static_fields   = 0;
+  eventp->u.class_load.statics             = NULL;
+  eventp->u.class_load.num_instance_fields = 0;
+  eventp->u.class_load.instances           = NULL;
+  eventp->u.class_load.class_id            = (jobjectID)kOop;
+}
+
+// Note: kOop must be mirror
+void jvmpi::fillin_class_load_event(oop kOop, JVMPI_Event *eventp, bool fillin_jni_ids) {
+  eventp->event_type = JVMPI_EVENT_CLASS_LOAD;
+  instanceKlassHandle k = java_lang_Class::as_klassOop(kOop);
+  assert(!k()->klass_part()->oop_is_array(), "must not be array classes");
+
+  instanceKlass* ik = instanceKlass::cast(k());
+  // get field info
+  int num_statics = 0;
+  int num_instances = 0;
+  for (FieldStream count_field_st(k, true, true); !count_field_st.eos(); count_field_st.next()) {
+    if (count_field_st.access_flags().is_static()) {
+      num_statics++;
+    } else {
+      num_instances++;
+    }
+  }
+  JVMPI_Field* statics = NEW_RESOURCE_ARRAY(JVMPI_Field, num_statics);
+  JVMPI_Field* instances = NEW_RESOURCE_ARRAY(JVMPI_Field, num_instances);
+  int i_stat = 0;
+  int i_inst = 0;
+  for (FieldStream field_st(k, true, true); !field_st.eos(); field_st.next()) {
+    char* f_name = field_st.name     ()->as_C_string();
+    char* f_sig  = field_st.signature()->as_C_string();
+    if (field_st.access_flags().is_static()) {
+      statics[i_stat].field_name      = f_name;
+      statics[i_stat].field_signature = f_sig;
+      i_stat++;
+    } else {
+      instances[i_inst].field_name      = f_name;
+      instances[i_inst].field_signature = f_sig;
+      i_inst++;
+    }
+  }
+  assert(i_inst == num_instances, "sanity check");
+  assert(i_stat == num_statics, "sanity check");
+  // get method info
+  int num_methods = ik->methods()->length();
+  JVMPI_Method* methods = NEW_RESOURCE_ARRAY(JVMPI_Method, num_methods);
+  int i_meth = 0;
+  for (MethodStream meth_st(k, true, true); !meth_st.eos(); meth_st.next()) {
+    methodOop m = meth_st.method();
+    methods[i_meth].method_name      = m->name()->as_C_string();
+    methods[i_meth].method_signature = m->signature()->as_C_string();
+    if (fillin_jni_ids) {
+      methods[i_meth].method_id      = m->jmethod_id();
+    } else {
+      // method_id doesn't mean much after class is unloaded
+      methods[i_meth].method_id      = NULL;
+    }
+    methods[i_meth].start_lineno     = m->line_number_from_bci(0);
+    if (m->code_size() > 0) {
+      methods[i_meth].end_lineno     = m->line_number_from_bci(m->code_size() - 1);
+    } else {
+      methods[i_meth].end_lineno     = m->line_number_from_bci(0);
+    }
+    i_meth++;
+  }
+
+  eventp->u.class_load.class_name          = ik->external_name();
+  if (ik->source_file_name() == NULL)
+    eventp->u.class_load.source_name       = NULL;
+  else
+    eventp->u.class_load.source_name       = ik->source_file_name()->as_C_string();
+  eventp->u.class_load.num_interfaces      = ik->local_interfaces()->length();
+  eventp->u.class_load.num_methods         = num_methods;
+  eventp->u.class_load.methods             = methods;
+  eventp->u.class_load.num_static_fields   = num_statics;
+  eventp->u.class_load.statics             = statics;
+  eventp->u.class_load.num_instance_fields = num_instances;
+  eventp->u.class_load.instances           = instances;
+  eventp->u.class_load.class_id            = (jobjectID)ik->java_mirror();
+}
+
+
+// List of classes unloaded for the duration of the CLASS_UNLOAD event
+// handler. Populated by save_class_unload_event_info(), queried by both
+// post_class_load_event() and post_class_unload_events(), and cleaned
+// up by post_class_unload_events().
+static GrowableArray<JVMPI_Event*>* unloaded_classes = NULL;
+
+// Note: kOop must be mirror
+void jvmpi::post_class_load_event(oop kOop, jint flag) {
+
+  if (flag == JVMPI_REQUESTED_EVENT && unloaded_classes != NULL) {
+    // This is a synthesized event and we are in the middle of unloading
+    // classes so see if the requested class is one that we unloaded.
+
+    // walk the list of unloaded class event information
+    for (int i = 0; i < unloaded_classes->length(); i++) {
+      JVMPI_Event *ev = unloaded_classes->at(i);
+      if ((oop)(ev->u.class_load.class_id) == kOop) {
+        // We are in the event handler for CLASS_UNLOAD event so
+        // we don't have to lock out GC. Post the saved event
+        // information for the unloaded class to the agent.
+        assert(GC_locker::is_active(), "GC must be locked when in event handler");
+        post_event_vm_mode(ev, NULL);
+        return;
+      }
+    }
+  }
+
+  ResourceMark rm;
+  JVMPI_Event event;
+  klassOop k = java_lang_Class::as_klassOop(kOop);
+
+  if (k->klass_part()->oop_is_array()) {
+    fillin_array_class_load_event(kOop, &event);
+  } else {
+    fillin_class_load_event(kOop, &event, true /* fillin JNI ids */);
+  }
+  event.event_type |= flag;
+  if (TraceJVMPI) {
+    tty->print("JVMPI: post_class_load_event for klass mirror " INTPTR_FORMAT " ", (address)kOop);
+    java_lang_Class::as_klassOop(kOop)->print_value();
+    tty->print(" ");
+    kOop->print_value();
+    tty->cr();
+  }
+
+  GC_locker::lock();
+  post_event_vm_mode(&event, NULL);
+  GC_locker::unlock();
+}
+
+
+void jvmpi::post_class_load_event(oop k) {
+  post_class_load_event(k, 0);
+}
+
+
+// Wrapper to translate the (32-bit) JVM/PI memory allocation function
+// to the HotSpot resource allocation function.
+void *jvmpi::jvmpi_alloc(unsigned int bytecnt) {
+  return (void *)resource_allocate_bytes((size_t)bytecnt);
+}
+
+
+void jvmpi::post_class_load_hook_event(unsigned char **ptrP,
+  unsigned char **end_ptrP, jvmpi_alloc_func_t malloc_f) {
+  JVMPI_Event event;
+
+  /* fill event info and notify the profiler */
+  event.event_type = JVMPI_EVENT_CLASS_LOAD_HOOK;
+
+  event.u.class_load_hook.class_data = *ptrP;
+  event.u.class_load_hook.class_data_len = *end_ptrP - *ptrP;
+  event.u.class_load_hook.malloc_f = malloc_f;
+
+  post_event(&event);
+
+  *ptrP = event.u.class_load_hook.new_class_data;
+  *end_ptrP = *ptrP + event.u.class_load_hook.new_class_data_len;
+}
+
+
+// Post CLASS_UNLOAD events and/or release saved memory.
+void jvmpi::post_class_unload_events() {
+  if (unloaded_classes != NULL) {  // we unloaded some classes
+    // walk the list of unloaded class event information
+    for (int i = 0; i < unloaded_classes->length(); i++) {
+      JVMPI_Event *ev = unloaded_classes->at(i);  // shorthand for saved info
+
+      if (jvmpi::is_event_enabled(JVMPI_EVENT_CLASS_UNLOAD)) {
+        // The caller is still interested in the events so post them.
+        // Note: by the time we get called, the caller may no longer
+        // be interested in the events, but we have to always free
+        // the memory below.
+        JVMPI_Event event;
+
+        GC_locker::lock();
+
+        // construct a CLASS_UNLOAD event from the saved into
+        event.event_type = JVMPI_EVENT_CLASS_UNLOAD;
+        event.u.class_unload.class_id = ev->u.class_load.class_id;
+
+        post_event_vm_mode(&event, NULL);
+
+        GC_locker::unlock();
+      }
+      delete ev;  // done with the saved info
+    }
+
+    delete unloaded_classes;
+    unloaded_classes = NULL;
+  }
+}
+
+
+// GC has caused a class to be unloaded so save CLASS_LOAD information
+// just in case there is a RequestEvent(CLASS_LOAD) call from the
+// CLASS_UNLOAD event handler.
+void jvmpi::save_class_unload_event_info(oop k) {
+  JVMPI_Event *ev = new JVMPI_Event();
+  fillin_class_load_event(k, ev, false /* don't fillin JNI id values */);
+  ev->event_type |= JVMPI_REQUESTED_EVENT;
+
+  if (unloaded_classes == NULL) {
+    // first unloaded class so setup initial space for the events
+    unloaded_classes =
+      new (ResourceObj::C_HEAP) GrowableArray<JVMPI_Event*>(5, true);
+  }
+  unloaded_classes->append(ev);
+}
+
+
+void jvmpi::post_dump_event() {
+  if (is_event_enabled(JVMPI_EVENT_DUMP_DATA_REQUEST)) {
+    JVMPI_Event event;
+    event.event_type = JVMPI_EVENT_DUMP_DATA_REQUEST;
+    post_event(&event);
+  }
+  if (is_event_enabled(JVMPI_EVENT_RESET_DATA_REQUEST)) {
+    JVMPI_Event event;
+    event.event_type = JVMPI_EVENT_RESET_DATA_REQUEST;
+    post_event(&event);
+  }
+}
+
+// Maintain an array of skipped JNI global refs and those JNI global refs
+// are not dumped as GC roots in the heap dumps since they are internal to VM.
+static GrowableArray<jobject>* skipped_globalrefs = NULL;
+
+void jvmpi::post_new_globalref_event(jobject ref, oop obj, bool post_jvmpi_event) {
+  if (post_jvmpi_event) {
+    // post new JNI global ref alloc event
+    JVMPI_Event event;
+
+    GC_locker::lock();
+
+    /* fill event info and notify the profiler */
+    event.event_type = JVMPI_EVENT_JNI_GLOBALREF_ALLOC;
+
+    event.u.jni_globalref_alloc.obj_id = (jobjectID)(obj);
+    event.u.jni_globalref_alloc.ref_id = ref;
+
+    post_event_vm_mode(&event, NULL);
+
+    GC_locker::unlock();
+
+  } else {
+    // Not to post new JNI global ref alloc event;
+    // need to save those skipped JNI global ref, which should not be
+    // dumped as GC roots in the heap dump
+
+    MutexLocker ml(JNIGlobalHandle_lock);
+    if (skipped_globalrefs == NULL) {
+      skipped_globalrefs = new (ResourceObj::C_HEAP) GrowableArray<jobject>(256, true);
+    }
+    skipped_globalrefs->append(ref);
+  }
+}
+
+
+void jvmpi::post_delete_globalref_event(jobject ref, bool post_jvmpi_event) {
+  if (post_jvmpi_event) {
+    // post JNI global ref free event
+    JVMPI_Event event;
+
+    GC_locker::lock();
+
+    /* fill event info and notify the profiler */
+    event.event_type = JVMPI_EVENT_JNI_GLOBALREF_FREE;
+
+    event.u.jni_globalref_free.ref_id = ref;
+
+    post_event_vm_mode(&event, NULL);
+
+    GC_locker::unlock();
+  } else {
+    // remove the JNI global ref from skipped_globalrefs list
+    MutexLocker ml(JNIGlobalHandle_lock);
+
+    int length = (skipped_globalrefs != NULL ? skipped_globalrefs->length() : 0);
+    int i = 0;
+
+    // we choose not to compact the array when a globalref is destroyed
+    // since the number of such calls might not be that many.
+    for (i = 0; i < length; i++) {
+      if (skipped_globalrefs->at(i) == ref) {
+        skipped_globalrefs->at_put(i, NULL);
+        break;
+      }
+    }
+    assert(length == 0 || i < length, "JNI global ref");
+  }
+}
+
+void jvmpi::post_new_weakref_event(jobject ref, oop obj) {
+  JVMPI_Event event;
+
+  GC_locker::lock();
+
+  /* fill event info and notify the profiler */
+  event.event_type = JVMPI_EVENT_JNI_WEAK_GLOBALREF_ALLOC;
+
+  event.u.jni_globalref_alloc.obj_id = (jobjectID)(obj);
+  event.u.jni_globalref_alloc.ref_id = ref;
+
+  post_event_vm_mode(&event, NULL);
+
+  GC_locker::unlock();
+}
+
+
+void jvmpi::post_delete_weakref_event(jobject ref) {
+  JVMPI_Event event;
+
+  GC_locker::lock();
+
+  /* fill event info and notify the profiler */
+  event.event_type = JVMPI_EVENT_JNI_WEAK_GLOBALREF_FREE;
+
+  event.u.jni_globalref_free.ref_id = ref;
+
+  post_event_vm_mode(&event, NULL);
+
+  GC_locker::unlock();
+}
+
+
+void jvmpi::post_arena_new_event(int arena_id, const char* arena_name) {
+  if (!is_event_enabled(JVMPI_EVENT_ARENA_NEW)) return;
+  JVMPI_Event event;
+
+  event.event_type = JVMPI_EVENT_ARENA_NEW;
+  event.u.new_arena.arena_id = arena_id;
+  event.u.new_arena.arena_name = arena_name;
+  post_event_vm_mode(&event, NULL);
+}
+
+
+void jvmpi::post_arena_delete_event(int arena_id) {
+  JVMPI_Event event;
+
+  event.event_type = JVMPI_EVENT_DELETE_ARENA;
+  event.u.delete_arena.arena_id = arena_id;
+  post_event_vm_mode(&event, NULL);
+}
+
+/* post_object_alloc_event requires size to be in bytes */
+void jvmpi::post_object_alloc_event(oop obj, size_t bytesize, jint arena_id, jint flag) {
+  // do not emit the event if the allocation event is not enabled, except if it is
+  // requested
+  if (!is_event_enabled(JVMPI_EVENT_OBJECT_ALLOC) && flag != JVMPI_REQUESTED_EVENT) return;
+  // bailout if obj is undefined
+  if (obj == NULL) return;
+  // bootstraping problem: Issue object allocation event for the java/lang/Class
+  // mirror with class set to NULL (to avoid infinite recursion).
+  bool bootstrap = (obj == Klass::cast(SystemDictionary::class_klass())->java_mirror());
+  // determine klass & is_array info
+  oop klass;
+  int is_array;
+  if (bootstrap) {
+    klass    = NULL;
+    is_array = JVMPI_NORMAL_OBJECT;
+  } else if (obj->is_instance()) {
+    klass    = Klass::cast(obj->klass())->java_mirror();
+    is_array = JVMPI_NORMAL_OBJECT;
+  } else if (obj->is_objArray()) {
+    klass    = Klass::cast(objArrayKlass::cast(obj->klass())->element_klass())->java_mirror();
+    is_array = JVMPI_CLASS;
+  } else if (obj->is_typeArray()) {
+    klass    = NULL;
+    is_array = typeArrayKlass::cast(obj->klass())->type();
+  } else {
+    klass    = JVMPI_INVALID_CLASS;
+    is_array = JVMPI_NORMAL_OBJECT;
+  }
+  // post event if ok
+  if (klass != JVMPI_INVALID_CLASS) {
+    if (!flag) GC_locker::lock();
+    /* fill event info and notify the profiler */
+    { JVMPI_Event event;
+      event.event_type           = JVMPI_EVENT_OBJECT_ALLOC | flag;
+      event.u.obj_alloc.arena_id = arena_id;
+      event.u.obj_alloc.class_id = (jobjectID)klass;
+      event.u.obj_alloc.is_array = is_array;
+      event.u.obj_alloc.size     = (int) bytesize; // spec will require 64 bit modifications
+      event.u.obj_alloc.obj_id   = (jobjectID)obj;
+      if (TraceJVMPI) {
+	tty->print_cr("JVMPI: post_object_alloc_event for object " INTPTR_FORMAT " ", (address)obj);
+      }
+      post_event_vm_mode(&event, NULL);
+    }
+    if (!flag) GC_locker::unlock();
+  }
+}
+
+
+void jvmpi::post_object_free_event(oop obj) {
+  JVMPI_Event event;
+
+  // $$$ There used to be an assertion that this was only happening during
+  // m/s collections.  Didn't know how to generalize, so I took it out.
+  // (DLD, 6/20).
+
+  /* fill event info and notify the profiler */
+  event.event_type = JVMPI_EVENT_OBJECT_FREE;
+
+  event.u.obj_free.obj_id = (jobjectID)obj;
+
+  post_event_vm_mode(&event, NULL);
+}
+
+
+void jvmpi::post_object_move_event(oop oldobj, int old_arena_id, oop newobj, int new_arena_id) {
+  JVMPI_Event event;
+
+  assert(Universe::heap()->is_gc_active(), "Should only move objects during GC");
+
+  /* fill event info and notify the profiler */
+  event.event_type = JVMPI_EVENT_OBJECT_MOVE;
+
+  event.u.obj_move.obj_id       = (jobjectID)oldobj;
+  event.u.obj_move.arena_id     = old_arena_id;
+  event.u.obj_move.new_obj_id   = (jobjectID)newobj;
+  event.u.obj_move.new_arena_id = new_arena_id;
+
+  post_event_vm_mode(&event, NULL);
+}
+
+
+static jint level = 0;
+
+void jvmpi::post_method_entry2_event(methodOop m, oop receiver) {
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_METHOD_ENTRY2;
+  event.u.method_entry2.method_id = m->jmethod_id();
+  event.u.method_entry2.obj_id = (jobjectID)receiver;
+  if (TraceJVMPI) {
+#if 0
+    ResourceMark rm;
+    tty->print_cr("%04d %s: method_entry2 %s",
+		  level++,
+		  ((JavaThread*)get_thread())->get_thread_name(),
+		  m->name_and_sig_as_C_string());
+#endif
+  }
+  post_event_vm_mode(&event, NULL);
+}
+
+
+void jvmpi::post_method_entry_event(methodOop m) {
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_METHOD_ENTRY;
+  event.u.method.method_id = m->jmethod_id();
+  if (TraceJVMPI) {
+#if 0
+    ResourceMark rm;
+    tty->print_cr("%04d %s: method_entry %s",
+		  level++,
+		  ((JavaThread*)get_thread())->get_thread_name(),
+		  m->name_and_sig_as_C_string());
+#endif
+  }
+  post_event_vm_mode(&event, NULL);
+}
+
+
+void jvmpi::post_method_exit_event(methodOop m) {
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_METHOD_EXIT;
+  event.u.method.method_id = m->jmethod_id();
+  if (TraceJVMPI) {
+#if 0
+    ResourceMark rm;
+    tty->print_cr("%04d %s: method_exit  %s",
+		  --level,
+		  ((JavaThread*)get_thread())->get_thread_name(),
+		  m->name_and_sig_as_C_string());
+#endif
+  }
+  post_event_vm_mode(&event, NULL);
+}
+
+
+// use  compiled_method_t so that the line number table can be constructed only
+// temporarily and then released after post_compiled_method_load_event terminates
+void jvmpi::post_compiled_method_load_event(compiled_method_t *compiled_method_info) {
+  JVMPI_Event event;
+
+  event.event_type = JVMPI_EVENT_COMPILED_METHOD_LOAD;
+  event.u.compiled_method_load.method_id         = compiled_method_info->method->jmethod_id();
+  event.u.compiled_method_load.code_addr         = compiled_method_info->code_addr;
+  event.u.compiled_method_load.code_size         = compiled_method_info->code_size;
+  event.u.compiled_method_load.lineno_table_size = compiled_method_info->lineno_table_len;
+  event.u.compiled_method_load.lineno_table      = compiled_method_info->lineno_table;
+
+  post_event_vm_mode(&event, NULL);
+}
+
+void jvmpi::post_compiled_method_unload_event(methodOop method) {
+  JVMPI_Event event;
+
+  event.event_type = JVMPI_EVENT_COMPILED_METHOD_UNLOAD;
+  event.u.compiled_method_unload.method_id = method->jmethod_id();
+  post_event_vm_mode(&event, NULL);
+}
+
+void jvmpi::post_monitor_contended_enter_event(void* object) {
+  GC_locker::lock();
+
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_MONITOR_CONTENDED_ENTER;
+  event.u.monitor.object = (jobjectID)object;
+  post_event_vm_mode(&event, NULL);
+
+  GC_locker::unlock();
+}
+
+
+void jvmpi::post_monitor_contended_entered_event(void* object) {
+  GC_locker::lock();
+
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_MONITOR_CONTENDED_ENTERED;
+  event.u.monitor.object = (jobjectID)object;
+  post_event_vm_mode(&event, NULL);
+
+  GC_locker::unlock();
+}
+
+
+void jvmpi::post_monitor_contended_exit_event(void* object) {
+  GC_locker::lock();
+
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_MONITOR_CONTENDED_EXIT;
+  event.u.monitor.object = (jobjectID)object;
+  post_event_vm_mode(&event, NULL);
+
+  GC_locker::unlock();
+}
+
+
+void jvmpi::post_monitor_wait_event(oop obj, jlong millis) {
+  GC_locker::lock();
+
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_MONITOR_WAIT;
+  event.u.monitor_wait.object  = (jobjectID)obj;
+  event.u.monitor_wait.timeout = millis;
+  post_event_vm_mode(&event, NULL);
+
+  GC_locker::unlock();
+}
+
+
+void jvmpi::post_monitor_waited_event(oop obj, jlong millis) {
+  GC_locker::lock();
+
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_MONITOR_WAITED;
+  event.u.monitor_wait.object  = (jobjectID)obj;
+  event.u.monitor_wait.timeout = millis;
+  post_event_vm_mode(&event, NULL);
+
+  GC_locker::unlock();
+}
+
+
+void jvmpi::post_raw_monitor_contended_enter_event(RawMonitor* rmon) {
+  Thread* tp = Thread::current();
+  if (tp->is_VM_thread()) return;
+  JVMPI_Event event;
+
+  event.event_type = JVMPI_EVENT_RAW_MONITOR_CONTENDED_ENTER;
+  event.u.raw_monitor.name = rmon->name();
+  event.u.raw_monitor.id = (JVMPI_RawMonitor)rmon;
+  post_event_vm_mode(&event, NULL);
+}
+
+
+void jvmpi::post_raw_monitor_contended_entered_event(RawMonitor* rmon) {
+  if (Thread::current()->is_VM_thread()) return;
+  JVMPI_Event event;
+
+  event.event_type = JVMPI_EVENT_RAW_MONITOR_CONTENDED_ENTERED;
+  event.u.raw_monitor.name = rmon->name();
+  event.u.raw_monitor.id = (JVMPI_RawMonitor)rmon;
+  post_event_vm_mode(&event, NULL);
+}
+
+
+void jvmpi::post_raw_monitor_contended_exit_event(RawMonitor* rmon) {
+  if (Thread::current()->is_VM_thread()) return;
+  JVMPI_Event event;
+
+  event.event_type = JVMPI_EVENT_RAW_MONITOR_CONTENDED_EXIT;
+  event.u.raw_monitor.name = rmon->name();
+  event.u.raw_monitor.id = (JVMPI_RawMonitor)rmon;
+  post_event_vm_mode(&event, NULL);
+}
+
+
+void jvmpi::post_gc_start_event() {
+  JVMPI_Event event;
+  assert(Thread::current()->is_VM_thread(), "wrong thread");
+
+  Thread* calling_thread = JavaThread::active();
+  /* fill event info and notify the profiler */
+  event.event_type = JVMPI_EVENT_GC_START;
+
+  assert(calling_thread->is_Java_thread(), "wrong thread");
+  post_event_vm_mode(&event, (JavaThread*)calling_thread);
+}
+
+
+class CountObjects: public ObjectClosure {
+ private:
+  int _nof_objects;
+ public:
+  CountObjects(): _nof_objects(0) {}
+
+  void do_object(oop obj) { _nof_objects++;  };
+
+  int nof_objects() const { return _nof_objects; }
+};
+
+
+void jvmpi::post_gc_finish_event(jlong used_obj_space, jlong total_obj_space) {
+  JVMPI_Event event;
+  assert(Thread::current()->is_VM_thread(), "wrong thread");
+  jlong used_objs = 0;
+  // compute number of used objects
+  { // Note: this is slow and cumbersome
+    CountObjects blk;
+    // Although the call to ensure_parseability()
+    // is not needed here due to this code running at the end of
+    // GC, these have been added here commented out since
+    // this code has moved around.
+    //  Universe::heap()->ensure_parseability();
+    Universe::heap()->permanent_object_iterate(&blk);
+
+    Universe::heap()->object_iterate(&blk);
+    used_objs = blk.nof_objects();
+  }
+  Thread* calling_thread = JavaThread::active();
+  /* fill event info and notify the profiler */
+  event.event_type = JVMPI_EVENT_GC_FINISH;
+  event.u.gc_info.used_objects       = used_objs;
+  event.u.gc_info.used_object_space  = used_obj_space;
+  event.u.gc_info.total_object_space = total_obj_space;
+
+  assert(calling_thread->is_Java_thread(), "wrong thread");
+  post_event_vm_mode(&event, (JavaThread*)calling_thread);
+}
+
+
+void jvmpi::post_trace_instr_event(unsigned char *pc, unsigned char opcode) {
+  Unimplemented();
+}
+
+
+void jvmpi::post_trace_if_event(unsigned char *pc, int is_true) {
+  Unimplemented();
+}
+
+
+void jvmpi::post_trace_tableswitch_event(unsigned char *pc, int key, int low, int hi) {
+  Unimplemented();
+}
+
+
+void jvmpi::post_trace_lookupswitch_event(unsigned char *pc, int chosen_pair_index, int pairs_total) {
+  Unimplemented();
+}
+
+
+// heap dumps
+
+// Dump is a helper class for all kinds of dumps that require
+// a buffer to hold the dump.
+
+class Dump: public StackObj {
+ private:
+  address _begin;                      // the beginning of the dump space, NULL if no space was allocated
+  address _end;                        // the current dump position
+  address _limit;                      // the limit of the dump space (debugging only)
+
+  void init(int dump_size) {
+    assert(dump_size <= 0 || _begin == NULL, "dump buffer already allocated");
+    _begin = dump_size > 0 ? NEW_C_HEAP_ARRAY(unsigned char, dump_size) : NULL;
+    _end   = _begin;
+    _limit = _begin + dump_size;
+  }
+
+  bool write() const                   { return begin() != NULL; }
+  address limit() const                { return _limit; }
+  void range_check(int size)           { assert(end() + size <= limit(), "end of dump reached"); }
+
+ public:
+  // creation
+  Dump()                               { init(0); }
+  void enable_write(int dump_size)     { init(dump_size); }
+  ~Dump()                              { if (write()) FREE_C_HEAP_ARRAY(unsigned char, begin()); }
+
+  // accessors
+  address begin() const                { return _begin; }
+  address end() const                  { return _end; }
+  int size() const                     { return end() - begin(); }
+
+  // primitive dumpers
+  void dump_u1(u1 x)                   { if (write()) { range_check(1); *_end = x;                   } _end += 1; }
+  void dump_u2(u2 x)                   { if (write()) { range_check(2); Bytes::put_Java_u2(_end, x); } _end += 2; }
+  void dump_u4(u4 x)                   { if (write()) { range_check(4); Bytes::put_Java_u4(_end, x); } _end += 4; }
+  void dump_u8(u8 x)                   { if (write()) { range_check(8); Bytes::put_Java_u8(_end, x); } _end += 8; }
+
+  // basic type dumpers
+  void dump_bool  (jboolean* x)        { dump_u1(*(u1*)x); }
+  void dump_char  (jchar*    x)        { dump_u2(*(u2*)x); }
+  void dump_float (jfloat*   x)        { dump_u4(*(u4*)x); }
+  void dump_double(jdouble*  x)        { dump_u8(*(u8*)x); }
+  void dump_byte  (jbyte*    x)        { dump_u1(*(u1*)x); }
+  void dump_short (jshort*   x)        { dump_u2(*(u2*)x); }
+  void dump_int   (jint*     x)        { dump_u4(*(u4*)x); }
+  void dump_long  (jlong*    x)        { dump_u8(*(u8*)x); }
+  void dump_obj   (oop*      x)        { dump_oop(*x); }
+
+  // other dumpers
+  //
+  // Note: jobjectID (oops) and JNIEnv* are not dumped in Java byte ordering
+  //       like all other data types - which is an inconsistency. It should
+  //       really be handled like all other data (and mapped to u4 for the
+  //       ia32 architecture).
+  void dump_oop(oop obj) {
+    if (obj != NULL && obj->is_klass()) {
+      // There are some objects, e.g., "unsafe" static field accessors,
+      // that can have a direct reference to an instanceKlass and we
+      // don't want to expose an internal data structure via a heap dump.
+      // Most places with 'if (obj->is_klass())' checks just return, but
+      // if we return from here, then that can confuse the caller that
+      // has assumptions about the dump size which will cause crashes.
+      // We just dump NULL instead.
+      obj = NULL;
+    }
+    assert(obj == NULL || obj->is_oop(), "not an oop");
+#ifndef _LP64
+    if (write()) {
+      range_check(4);
+      Bytes::put_native_u4(_end, (u4)obj);
+    }
+    _end += 4;
+#else
+    if (write()) {
+      range_check(8);
+      Bytes::put_native_u8(_end, (u8)obj);
+    }
+    _end += 8;
+#endif
+  }
+
+#ifndef _LP64
+  void dump_thread(JNIEnv* env)        { if (write()) { range_check(4); Bytes::put_native_u4(_end, (u4)env); } _end += 4; }
+  void dump_rawmonitor(JVMPI_RawMonitor mon) { if (write()) { range_check(4); Bytes::put_native_u4(_end, (u4)mon); } _end += 4; }
+  void dump_char_array(const char* s)        { if (write()) { range_check(4); Bytes::put_native_u4(_end, (u4)s); } _end += 4; }
+  void dump_voids(void* x)             { dump_u4((u4)x); }
+#else
+  void dump_thread(JNIEnv* env)        { if (write()) { range_check(8); Bytes::put_native_u8(_end, (u8)env); } _end += 8; }
+  void dump_rawmonitor(JVMPI_RawMonitor mon) { if (write()) { range_check(8); Bytes::put_native_u8(_end, (u8)mon); } _end += 8; }
+  void dump_char_array(const char* s)        { if (write()) { range_check(8); Bytes::put_native_u8(_end, (u8)s); } _end += 8; }
+  void dump_voids(void* x)             { dump_u8((u8)x); }
+#endif
+  void dump_type (int type)            { dump_u1((u1)type); }
+
+  // patching
+  void patch_u2(address at, u2 x) {
+    if (write()) {
+      assert(begin() <= at && at + 2 <= limit(), "patching outside dump space");
+      Bytes::put_Java_u2(at, x);
+    }
+  }
+
+  void patch_u4(address at, u4 x) {
+    if (write()) {
+      assert(begin() <= at && at + 4 <= limit(), "patching outside dump space");
+      Bytes::put_Java_u4(at, x);
+    }
+  }
+};
+
+
+class FieldDumper: public SignatureIterator {
+ private:
+  Dump*   _dump;
+  address _addr;
+  bool    _dump_basic_types;
+
+ public:
+  FieldDumper(Dump* dump, int level, symbolHandle signature, address addr)
+  : SignatureIterator(signature)
+  , _dump(dump)
+  , _addr(addr)
+  {
+    _dump_basic_types = (level == JVMPI_DUMP_LEVEL_2);
+    dispatch_field();
+  }
+
+  void do_bool  ()                     { if (_dump_basic_types) _dump->dump_bool  ((jboolean*)_addr); }
+  void do_char  ()                     { if (_dump_basic_types) _dump->dump_char  ((jchar*   )_addr); }
+  void do_float ()                     { if (_dump_basic_types) _dump->dump_float ((jfloat*  )_addr); }
+  void do_double()                     { if (_dump_basic_types) _dump->dump_double((jdouble* )_addr); }
+  void do_byte  ()                     { if (_dump_basic_types) _dump->dump_byte  ((jbyte*   )_addr); }
+  void do_short ()                     { if (_dump_basic_types) _dump->dump_short ((jshort*  )_addr); }
+  void do_int   ()                     { if (_dump_basic_types) _dump->dump_int   ((jint*    )_addr); }
+  void do_long  ()                     { if (_dump_basic_types) _dump->dump_long  ((jlong*   )_addr); }
+  void do_void  ()                     { ShouldNotReachHere();                                        }
+  void do_object(int begin, int end)   {                        _dump->dump_obj   ((oop*     )_addr); }
+  void do_array (int begin, int end)   {                        _dump->dump_obj   ((oop*     )_addr); }
+};
+
+
+// The ObjectDumper takes care of any heap object to be dumped.
+// Note that non java-level objects are filtered out (such as
+// klasses, methodOops, etc.) and that mirrors are converted
+// into klasses for the dump.
+
+class ObjectDumper: public StackObj {
+ private:
+  Dump* _dump;
+  int   _level;
+
+  void dump_instance(instanceOop instance) {
+    if (_level == JVMPI_DUMP_LEVEL_0) {
+      // dump type and id only
+      _dump->dump_type(JVMPI_NORMAL_OBJECT);
+      _dump->dump_oop(instance);
+      return;
+    }
+    // dump header
+    _dump->dump_type(JVMPI_GC_INSTANCE_DUMP);
+    _dump->dump_oop(instance);
+    _dump->dump_oop(Klass::cast(instance->klass())->java_mirror());
+    _dump->dump_u4((u4)0);              // reserve space for no. of bytes - patched at the end
+    address field_start = _dump->end(); // remember start of field dump
+    // dump instance fields
+    // (note: dumping in reverse order since the klass load event dumps
+    //        the instance field description in reverse order as well.)
+    { for (FieldStream s(instanceKlassHandle(instance->klass()), false, true); !s.eos(); s.next()) {
+        // ignore static fields as they are not in the instance
+        if (!s.access_flags().is_static()) {
+          FieldDumper(_dump, _level, s.signature(), (address)instance + s.offset());
+        }
+      }
+    }
+    // patch no. of bytes
+    _dump->patch_u4(field_start - 4, _dump->end() - field_start);
+  }
+
+  void dump_obj_array(objArrayOop array) {
+    // Note: Do not dump system object arrays as they are meaningless
+    //       for hprof. Furthermore, they contain klasses which should
+    //       never go out w/o extra treatment.
+    if (array->klass() != Universe::systemObjArrayKlassObj()) {
+      if (_level == JVMPI_DUMP_LEVEL_0) {
+        // dump type and id only
+        _dump->dump_type(JVMPI_CLASS);
+        _dump->dump_oop(array);
+        return;
+      }
+      oop klass = Klass::cast(objArrayKlass::cast(array->klass())->element_klass())->java_mirror();
+      const int length = array->length();
+      // dump header
+      _dump->dump_type(JVMPI_GC_OBJ_ARRAY_DUMP);
+      _dump->dump_oop(array);
+      _dump->dump_u4(length);
+      _dump->dump_oop(klass);
+      // dump elements
+      for (int i = 0; i < length; i++) _dump->dump_oop(array->obj_at(i));
+      // debugging
+      if (TraceJVMPI) {
+        tty->print("JVMPI: dump @ " INTPTR_FORMAT " obj array [%d] (klass = " INTPTR_FORMAT ")", (address)array, length, (address)klass);
+        if (Verbose) {
+          tty->print(" {");
+          for (int i = 0; i < length; i++) {
+            if (i > 0) tty->print(", ");
+            tty->print(INTPTR_FORMAT, (address)array->obj_at(i));
+          }
+          tty->print("}");
+        }
+        tty->cr();
+      }
+    }
+  }
+
+  void dump_type_array(typeArrayOop array) {
+    const int length = array->length();
+    const BasicType type = typeArrayKlass::cast(array->klass())->type();
+    int jvmpi_type = -1;
+    switch (type) {
+      case T_BOOLEAN: jvmpi_type = JVMPI_BOOLEAN; break;
+      case T_CHAR   : jvmpi_type = JVMPI_CHAR   ; break;
+      case T_FLOAT  : jvmpi_type = JVMPI_FLOAT  ; break;
+      case T_DOUBLE : jvmpi_type = JVMPI_DOUBLE ; break;
+      case T_BYTE   : jvmpi_type = JVMPI_BYTE   ; break;
+      case T_SHORT  : jvmpi_type = JVMPI_SHORT  ; break;
+      case T_INT    : jvmpi_type = JVMPI_INT    ; break;
+      case T_LONG   : jvmpi_type = JVMPI_LONG   ; break;
+      default       : ShouldNotReachHere();
+    }
+    if (_level == JVMPI_DUMP_LEVEL_0) {
+      // dump type and id only
+      _dump->dump_type(jvmpi_type);
+      _dump->dump_oop(array);
+      return;
+    }
+    // dump header
+    _dump->dump_type(JVMPI_GC_PRIM_ARRAY_DUMP);
+    _dump->dump_oop(array);
+    _dump->dump_u4(length);
+    _dump->dump_type(jvmpi_type);
+    // dump elements
+    if (_level == JVMPI_DUMP_LEVEL_2) {
+      switch (type) {
+        case T_BOOLEAN: { for (int i = 0; i < length; i++) _dump->dump_bool  (array->bool_at_addr  (i)); } break;
+        case T_CHAR   : { for (int i = 0; i < length; i++) _dump->dump_char  (array->char_at_addr  (i)); } break;
+        case T_FLOAT  : { for (int i = 0; i < length; i++) _dump->dump_float (array->float_at_addr (i)); } break;
+        case T_DOUBLE : { for (int i = 0; i < length; i++) _dump->dump_double(array->double_at_addr(i)); } break;
+        case T_BYTE   : { for (int i = 0; i < length; i++) _dump->dump_byte  (array->byte_at_addr  (i)); } break;
+        case T_SHORT  : { for (int i = 0; i < length; i++) _dump->dump_short (array->short_at_addr (i)); } break;
+        case T_INT    : { for (int i = 0; i < length; i++) _dump->dump_int   (array->int_at_addr   (i)); } break;
+        case T_LONG   : { for (int i = 0; i < length; i++) _dump->dump_long  (array->long_at_addr  (i)); } break;
+        default       : ShouldNotReachHere();
+      }
+    }
+    // debugging
+    if (TraceJVMPI) {
+      tty->print_cr("JVMPI: dump @ " INTPTR_FORMAT " prim array [%d] (type = %d)", (address)array, length, type);
+    }
+  }
+
+  void dump_klass(klassOop klass) {
+    if (Klass::cast(klass)->oop_is_instance()) {
+      instanceKlass* k = instanceKlass::cast(klass);
+      // Check for level 0 dump
+      if (_level == JVMPI_DUMP_LEVEL_0) {
+        // dump type and id only
+        _dump->dump_type(JVMPI_NORMAL_OBJECT);    // Is this right?
+        _dump->dump_oop(k->java_mirror());
+        return;
+      }
+      // dump header
+      _dump->dump_type(JVMPI_GC_CLASS_DUMP);
+      _dump->dump_oop(k->java_mirror());
+      _dump->dump_oop(k->super() == NULL ? (oop)NULL : Klass::cast(k->super())->java_mirror());
+      _dump->dump_oop(k->class_loader());
+      _dump->dump_oop(k->signers());
+      _dump->dump_oop(k->protection_domain());
+      _dump->dump_oop(StringTable::lookup(k->name())); // NULL if not interned string
+      _dump->dump_voids(NULL); // reserved
+      _dump->dump_u4(k->size_helper() * BytesPerWord);
+      // dump interfaces
+      { objArrayOop interfaces = k->local_interfaces();
+        for (int i = 0; i < interfaces->length(); i++) {
+          oop interf = Klass::cast((klassOop)interfaces->obj_at(i))->java_mirror();
+          _dump->dump_oop(interf);
+        }
+      }
+      // dump constant pool
+      { address size_loc = _dump->end();    // remember constant pool size location for later patching
+        _dump->dump_u2((u2)0);              // reserve space for constant pool size - patched at the end
+        int size = 0;
+        const constantPoolOop pool = k->constants();
+        for (int i = 1; i < pool->length(); i++) { // index i = 0 is unused!
+          address end = _dump->end();
+          // for now we ignore all entries
+          // eventually we should probably
+          // dump at least the oop entries
+          /*
+          switch (pool->tag_at(i).value()) {
+            case JVM_CONSTANT_Class:
+            case JVM_CONSTANT_Fieldref:
+            ...
+          }
+          */
+          if (end != _dump->end()) size++; // count individual entries
+        }
+        // patch number of entries
+        _dump->patch_u2(size_loc, size);
+      }
+      // dump static fields
+      // (note: dumping in reverse order since the klass load event dumps
+      //        the static field description in reverse order as well.)
+      {
+         instanceKlassHandle kh(klass);
+         FieldStream s(kh, true, true);
+         for (; !s.eos(); s.next()) {
+           // ignore instance fields as they are not in the klass
+           if (s.access_flags().is_static()) {
+             FieldDumper(_dump, _level, s.signature(), (address)klass + s.offset());
+           }
+         }
+      }
+    } else if (Klass::cast(klass)->oop_is_objArray()) {
+      objArrayKlass* k = objArrayKlass::cast(klass);
+      // Check for level 0 dump
+      if (_level == JVMPI_DUMP_LEVEL_0) {
+        // dump type and id only
+        _dump->dump_type(JVMPI_NORMAL_OBJECT);    // Is this right?
+        _dump->dump_oop(k->java_mirror());
+        return;
+      }
+      // still missing
+    }
+  }
+
+ public:
+  ObjectDumper(Dump* dump, int level, oop obj) : _dump(dump), _level(level) {
+    // filter out all klasses
+    if (obj->is_klass()) return;
+    // convert mirrors
+    if (obj->klass() == SystemDictionary::class_klass()) {
+      // obj is a mirror - convert into corresponding class if possible
+      if (!java_lang_Class::is_primitive(obj)) {
+        // obj is not a mirror for a primitive class (basic type)
+        // get the corresponding class for dumping
+        obj = java_lang_Class::as_klassOop(obj);
+        assert(obj != NULL, "class for non-primitive mirror must exist");
+      } else {
+        // obj is a mirror for a primitice class (basic type)
+        // for which we don't have a (VM-level) class => dump
+        // mirror as it is.
+      }
+    }
+    // dump object
+           if (obj->is_instance ()) { dump_instance  ((instanceOop )obj);
+    } else if (obj->is_objArray ()) { dump_obj_array ((objArrayOop )obj);
+    } else if (obj->is_typeArray()) { dump_type_array((typeArrayOop)obj);
+    } else if (obj->is_klass    ()) { dump_klass     ((klassOop    )obj);
+    }
+  }
+};
+
+
+class JvmpiHeapDumper: public ObjectClosure {
+ private:
+  Dump* _dump;
+  int   _level;
+
+ public:
+  JvmpiHeapDumper(Dump* dump, int level) : _dump(dump), _level(level) {
+    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+    // Ensure the heap's parseable before iterating over it
+    Universe::heap()->ensure_parseability();
+    Universe::heap()->object_iterate(this);
+  }
+
+  void do_object(oop obj)              { ObjectDumper(_dump, _level, obj); }
+};
+
+
+// Move this in machine specific part !
+class MonitorDumper: public StackObj {
+ private:
+  Dump* _dump;
+
+  void dump_for_thread(ObjectMonitor* mid, JavaThread* thread) {
+    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+    ResourceMark rm;
+    // klassOops may be locked (e.g. due to class initialization). Make sure to skip them.
+    if (((oop)mid->object())->is_klass()) return;
+    //
+    // Solaris implements mid->count() differently than Win32 or Linux so
+    // we had to create and use the OS specific contentions() function.
+    //
+    int n_want_lock = mid->contentions();     // number of threads contending for the monitor
+    int n_waiters = mid->waiters();
+    // this is an unused monitor so skip it
+    if (thread == NULL && n_want_lock == 0 && n_waiters == 0) return;
+    // dump header
+    _dump->dump_type(JVMPI_MONITOR_JAVA);
+    _dump->dump_oop((oop)mid->object());
+    _dump->dump_thread(thread == NULL ? NULL : thread->jni_environment());
+    _dump->dump_u4(n_want_lock + n_waiters); // entry count
+    _dump->dump_u4(n_want_lock); // number of threads waiting to enter
+    if (n_want_lock > 0) {
+      GrowableArray<JavaThread*>* want_list = Threads::get_pending_threads(
+	n_want_lock, (address)mid, false /* no locking needed */);
+      for (int i = 0; i < n_want_lock; i++) {
+        if (i < want_list->length()) {
+          JavaThread* jt = want_list->at(i);
+          _dump->dump_thread(jt->jni_environment());
+        } else {
+          _dump->dump_thread(NULL);
+        }
+      }
+    }
+    _dump->dump_u4(n_waiters); // number of threads waiting to be notified
+    if (n_waiters > 0) {
+      ObjectWaiter* waiter = mid->first_waiter();
+      for (int i = 0; i < n_waiters; i++) {
+//        assert(waiter != NULL, "wrong number of waiters");
+// No guarantee this value doesn't change while dumping
+	if (waiter != NULL) {
+          Thread* thd = mid->thread_of_waiter(waiter);
+          if (thd->is_Java_thread()) {
+            _dump->dump_thread(((JavaThread*)thd)->jni_environment());
+          } else {
+            _dump->dump_thread(NULL);
+          }
+          waiter = mid->next_waiter(waiter);
+	} else {
+	  _dump->dump_thread(NULL);
+	}
+      }
+    }
+  }
+
+ public:
+  MonitorDumper(Dump* dump, ObjectMonitor* mid): _dump(dump) {
+    // dump Java lock
+    dump_for_thread(mid, Threads::owning_thread_from_monitor_owner(
+      (address)mid->owner(), false /* no locking needed */));
+  }
+};
+
+
+class JavaMonitorDumper: public MonitorClosure {
+ private:
+  Dump* _dump;
+
+ public:
+  JavaMonitorDumper(Dump* dump) : _dump(dump) {
+    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+    ObjectSynchronizer::monitors_iterate(this);   // first dump the monitor cache
+    if (!UseHeavyMonitors) {	// now dump any lightweight monitors
+      ResourceMark rm;
+      GrowableArray<ObjectMonitor*>* fab_list = Threads::jvmpi_fab_heavy_monitors();
+      for (int i = 0; i < fab_list->length(); i++) {
+        ObjectMonitor* fab = fab_list->at(i);
+        assert(fab != NULL, "Expected fabricated heavyweight monitor");
+        MonitorDumper(_dump, fab);
+        // ObjectMonitor is a CHeap object, so remember to free it
+        delete fab;
+      }
+    }
+  }
+  void do_monitor(ObjectMonitor* mid)  { MonitorDumper(_dump, mid); }
+};
+
+
+class RawMonitorDumper: public StackObj {
+ private:
+  Dump* _dump;
+ public:
+  RawMonitorDumper(Dump* dump) : _dump(dump) {
+    for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
+      dump_rawmonitors_for(thread);
+    }
+  }
+
+  void dump_rawmonitors_for(JavaThread* thread) {
+    char* no_name = NULL;
+    for (RawMonitor* mon = thread->rawmonitor_list(); mon; mon = mon->next_raw()) {
+      assert((PROF_RM_CHECK(mon)), "invalid raw monitor");
+      _dump->dump_type(JVMPI_MONITOR_RAW);
+      _dump->dump_char_array(mon->name());
+      _dump->dump_rawmonitor((JVMPI_RawMonitor) mon);
+      _dump->dump_thread(thread->jni_environment());
+      dump_monitor_info(mon, thread);
+    }
+  }
+
+  void dump_monitor_info(RawMonitor* mid, JavaThread* thread)
+{
+    //
+    // Solaris implements mid->count() differently than Win32 or Linux so
+    // we had to create and use the OS specific contentions() function.
+    //
+    int n_want_lock = mid->contentions();     // number of threads contending for the monitor
+    int n_waiters = mid->waiters();
+    // this is an unused monitor so skip it
+    if (thread == NULL && n_want_lock == 0 && n_waiters == 0) return;
+    // dump header
+    _dump->dump_u4(n_want_lock + n_waiters); // entry count
+    _dump->dump_u4(n_want_lock); // number of threads waiting to enter
+    if (n_want_lock > 0) {
+      GrowableArray<JavaThread*>* want_list = Threads::get_pending_threads(
+	n_want_lock, (address)mid, false /* no locking needed */);
+      for (int i = 0; i < n_want_lock; i++) {
+        if (i < want_list->length()) {
+          JavaThread* jt = want_list->at(i);
+          _dump->dump_thread(jt->jni_environment());
+        } else {
+          _dump->dump_thread(NULL);
+        }
+      }
+    }
+    _dump->dump_u4(n_waiters); // number of threads waiting to be notified
+    if (n_waiters > 0) {
+      ObjectWaiter* waiter = mid->first_waiter();
+      for (int i = 0; i < n_waiters; i++) {
+//        assert(waiter != NULL, "wrong number of waiters");
+// no guarantee that this is not changing dynamically
+          if (waiter != NULL) {
+            Thread* thd = mid->thread_of_waiter(waiter);
+            if (thd->is_Java_thread()) {
+              _dump->dump_thread(((JavaThread*)thd)->jni_environment());
+            } else {
+              _dump->dump_thread(NULL);
+            }
+            waiter = mid->next_waiter(waiter);
+          } else {
+            _dump->dump_thread(NULL);
+          }
+      }
+    }
+  }
+};
+
+// JVMPI GC Root Collection support
+
+class HeapDumpInfoCollector;
+class RootElementForThread;
+class RootElementForFrame;
+
+class CollectRootOopsClosure : public OopClosure {
+public:
+  enum RootType {
+    _unknown,
+    _jni_handle,
+    _stack_frame,
+    _system_class,
+    _thread_block,
+    _monitor_used
+  };
+
+ private:
+  JavaThread* _thread;
+  intptr_t*   _frame_id;
+  bool        _is_native_frame;
+  bool        _is_entry_frame;
+  GrowableArray<RootType>* typesStack;
+  HeapDumpInfoCollector*   _rc;
+
+ public:
+  CollectRootOopsClosure(HeapDumpInfoCollector *rc) {
+    _rc = rc;
+    typesStack = new (ResourceObj::C_HEAP) GrowableArray<RootType>(5, true);
+    // Support nested begin_iterate and end_iterate calls
+    typesStack->push(_unknown);
+  }
+  ~CollectRootOopsClosure() {
+    assert(typesStack->length() == 1, "All types should be popped");
+    delete typesStack;
+  }
+  void set_thread(JavaThread* thread) {
+    _thread = thread;
+    _frame_id = NULL;
+    _is_native_frame = false;
+    _is_entry_frame = false;
+  }
+  void set_frame_type(bool is_native, bool is_entry) {
+    _is_native_frame = is_native;
+    _is_entry_frame = is_entry;
+  }
+  void set_frame_id(intptr_t* id) {
+    _frame_id = id;
+  }
+  void begin_iterate(RootType type) { typesStack->push(type); }
+  void end_iterate(RootType type) {
+    RootType t = typesStack->pop();
+    assert(t == type, "type doesn't match");
+  }
+  void do_oop(oop* obj_p);
+};
+
+class CallTraceDump: public StackObj {
+  jint _num_traces;
+  int  _index;
+  int  _frame_index;
+  JVMPI_CallTrace*  _traces;
+  JVMPI_CallFrame** _frames;
+public:
+  CallTraceDump() { _num_traces = 0; _traces = NULL; _frames = NULL; _index = 0; }
+  ~CallTraceDump() {
+    for (int i = 0; i < _num_traces; i++) {
+      FREE_C_HEAP_ARRAY(JVMPI_CallFrame, _frames[i]);
+    }
+    FREE_C_HEAP_ARRAY(JVMPI_CallTrace, _traces);
+    FREE_C_HEAP_ARRAY(JVMPI_CallFrame*, _frames);
+  }
+  void set_calltrace(JavaThread* thread, int num_frames) {
+    assert(_traces != NULL && _index < _num_traces, "check number of calltraces generated");
+    assert(_index == -1 || _frame_index == _traces[_index].num_frames, "Previous call trace is not filled.");
+    _index++;
+    _frames[_index] = NEW_C_HEAP_ARRAY(JVMPI_CallFrame, num_frames);
+    _traces[_index].env_id = thread->jni_environment();;
+    _traces[_index].num_frames = num_frames;
+    _traces[_index].frames = _frames[_index];
+    _frame_index = 0;
+  }
+  void set_callframe(jint lineno, jmethodID method_id) {
+    assert(_traces[_index].frames != NULL, "JVMPI_CallFrames must have been allocated");
+    assert(_frame_index < _traces[_index].num_frames, "Invalid _frame_index");
+    JVMPI_CallFrame* frame = _traces[_index].frames;
+    frame[_frame_index].lineno = lineno;
+    frame[_frame_index].method_id = method_id;
+    _frame_index++;
+  }
+  void set_num_traces(jint num_traces) {
+    _num_traces = num_traces;
+    _index = -1;
+    _frame_index = -1;
+    if (num_traces > 0) {
+      _traces = NEW_C_HEAP_ARRAY(JVMPI_CallTrace, num_traces);
+      _frames = NEW_C_HEAP_ARRAY(JVMPI_CallFrame*, num_traces);
+    } else {
+      _traces = NULL;
+      _frames = NULL;
+    }
+  }
+  jint get_num_traces() { return _num_traces; }
+  JVMPI_CallTrace* get_calltraces() {
+    assert(_index == (_num_traces - 1), "Not all call traces are filled");
+    assert(_frame_index == _traces[_index].num_frames, "The last call trace is not filled");
+    return _traces;
+  }
+};
+
+const jint ROOT_JNI_GLOBAL_SIZE   = (1 + BytesPerWord * 2);
+const jint ROOT_JNI_LOCAL_SIZE    = (1 + BytesPerWord * 2 + 4);
+const jint ROOT_JAVA_FRAME_SIZE   = (1 + BytesPerWord * 2 + 4);
+const jint ROOT_NATIVE_STACK_SIZE = (1 + BytesPerWord * 2);
+const jint ROOT_STICKY_CLASS_SIZE = (1 + BytesPerWord);
+const jint ROOT_THREAD_BLOCK_SIZE = (1 + BytesPerWord * 2);
+const jint ROOT_MONITOR_USED_SIZE = (1 + BytesPerWord);
+const jint ROOT_UNKNOWN_SIZE      = (1 + BytesPerWord);
+const jint INIT_ROOTS_ARRAY_SIZE  = 256;
+
+class HeapDumpInfoCollector: public StackObj {
+ private:
+  jint                  _num_threads;
+  RootElementForThread* _threadRootInfo;
+  GrowableArray<oop*>*  _jni_global_roots;
+  GrowableArray<oop>*   _sticky_class_roots;
+  GrowableArray<oop>*   _monitor_used_roots;
+  GrowableArray<oop>*   _unknown_roots;
+
+  void collect_roots();
+  void add_root_to_thread(jint root_type, oop root, JavaThread* thread = NULL, intptr_t* sp = NULL, oop* obj_p = NULL);
+  void set_curRootThread(JavaThread *thread);
+  RootElementForThread* curRootThread;
+  bool                  is_collect_roots;
+
+ public:
+
+  // HeapDumpInfoCollector collects call traces and
+  // if roots is true, it collects GC root references as well.
+  HeapDumpInfoCollector(bool collect_gc_roots);
+  ~HeapDumpInfoCollector();
+
+  bool is_jni_local(JavaThread* thread, intptr_t* sp, oop* obj_p);
+  void add_root(jint root_type, oop root, JavaThread* thread = NULL, intptr_t* sp = NULL, oop* obj_p = NULL);
+  void add_root(jint root_type, oop* root); // JNI global reference
+  jlong root_dump_size() const;
+  void dump_roots(Dump* dump) const;
+  void dump_calltraces(CallTraceDump* traces) const;
+
+  static void sort_roots(GrowableArray<oop>* roots);
+};
+
+
+class RootElementForFrame : public CHeapObj {
+ private:
+  intptr_t* _frame_id;
+  jint      _depth;
+  bool      _is_native_method;
+  jint      _lineno;
+  jmethodID _method_id;
+  GrowableArray<oop>*  _roots;
+  GrowableArray<oop>*  _jni_local_roots;
+  GrowableArray<oop*>* _jni_local_refs;
+  RootElementForFrame* _next;
+
+ public:
+  RootElementForFrame(intptr_t* id, bool is_native, jmethodID mid = 0, jint lineno = 0, jint d = 0) {
+    _frame_id = id;
+    _is_native_method = is_native;
+    _method_id = mid;
+    _lineno = lineno;
+    _depth = d;
+    _next = NULL;
+    _roots = NULL;
+    _jni_local_roots = NULL;
+    _jni_local_refs = NULL;
+  }
+  ~RootElementForFrame() {
+    if (_roots != NULL) {
+      delete _roots;
+    }
+    if (_jni_local_roots != NULL) {
+      delete _jni_local_roots;
+      delete _jni_local_refs;
+    }
+  };
+  RootElementForFrame* next()           { return _next; }
+  void set_next(RootElementForFrame* p) { _next = p; }
+  void set_depth(jint d)                { _depth = d; }
+  jint lineno()                         { return _lineno; }
+  jmethodID method_id()                 { return _method_id; }
+  intptr_t* frame_id()                  { return _frame_id; }
+  bool is_jni_local(oop* obj_p) {
+    if (_jni_local_refs == NULL) return false;
+
+    int length = _jni_local_refs->length();
+    for (int i = 0; i < length; i++) {
+      if (_jni_local_refs->at(i) == obj_p) {
+        return true;
+      }
+    }
+    return false;
+  }
+  void add_root(oop obj) {
+    if (_roots == NULL) {
+      _roots = new (ResourceObj::C_HEAP) GrowableArray<oop>(INIT_ROOTS_ARRAY_SIZE, true);
+    }
+    _roots->append(obj);
+  }
+  void add_jni_local(oop obj, oop* obj_p) {
+    assert(obj_p != NULL, "JNI local ref");
+    if (_jni_local_roots == NULL) {
+      _jni_local_roots = new (ResourceObj::C_HEAP) GrowableArray<oop>(INIT_ROOTS_ARRAY_SIZE, true);
+      _jni_local_refs = new (ResourceObj::C_HEAP) GrowableArray<oop*>(INIT_ROOTS_ARRAY_SIZE, true);
+    }
+    _jni_local_roots->append(obj);
+    _jni_local_refs->append(obj_p);
+  }
+  void sort_roots() {
+    HeapDumpInfoCollector::sort_roots(_roots);
+    HeapDumpInfoCollector::sort_roots(_jni_local_roots);
+  }
+  void dump_roots(Dump* dump, JNIEnv* env_id) const;
+  jlong root_dump_size() const;
+};
+
+class RootElementForThread : public CHeapObj {
+ private:
+  JavaThread* _thread;
+  jint        _num_frames;
+  RootElementForFrame*  _frameRootInfo;
+  RootElementForFrame*  _empty_java_frame;
+  GrowableArray<oop>*   _native_stack_roots;
+  GrowableArray<oop>*   _thread_block_roots;
+  RootElementForThread* _next;
+
+  void get_stack_trace();
+  void add_root_to_frame(jint root_type, oop root, intptr_t* sp, oop* obj_p = NULL);
+  RootElementForFrame* curRootFrame;
+
+ public:
+  RootElementForThread(JavaThread* t, bool is_collect_roots);
+  ~RootElementForThread();
+
+  RootElementForFrame* get_frame(intptr_t* id);
+  RootElementForThread* next()           { return _next; }
+  void set_next(RootElementForThread* p) { _next = p; }
+  JavaThread* thread()                   { return _thread; }
+  bool is_jni_local(intptr_t* sp, oop* obj_p);
+  void add_root(jint root_type, oop root, intptr_t* sp, oop* obj_p = NULL);
+  void sort_roots() {
+    if (_num_frames == 0) {
+      _empty_java_frame->sort_roots();
+    } else {
+      for (RootElementForFrame* p = _frameRootInfo; p != NULL; p = p->next()) {
+        p->sort_roots();
+      }
+    }
+
+    HeapDumpInfoCollector::sort_roots(_native_stack_roots);
+    HeapDumpInfoCollector::sort_roots(_thread_block_roots);
+  }
+  void dump_roots(Dump* dump) const;
+  jlong root_dump_size() const;
+  void dump_calltrace(CallTraceDump* dump) const;
+};
+
+// Implementation of CollectRootOopsClosure::do_oop()
+void CollectRootOopsClosure::do_oop(oop* obj_p) {
+  oop obj = *obj_p;
+  RootType type = typesStack->top();
+  bool is_klass = false;
+
+  if (obj == NULL ||
+      (type == _system_class && !obj->is_klass()) || // Skip if not a klass for system class roots
+      (type != _system_class && !obj->is_instance() && !obj->is_typeArray() && !obj->is_objArray())) {
+      return;
+  }
+
+  if (obj->is_klass()) {
+    if (obj->blueprint()->oop_is_instanceKlass() || obj->blueprint()->oop_is_typeArrayKlass() || obj->blueprint()->oop_is_objArrayKlass()) {
+      obj = Klass::cast((klassOop)obj)->java_mirror();
+      is_klass = true;
+    }
+  }
+
+  switch (type) {
+    case _unknown:
+      _rc->add_root(JVMPI_GC_ROOT_UNKNOWN, obj);
+      break;
+    case _jni_handle:
+      if (obj == JNIHandles::deleted_handle()) {
+        // skip deleted handles
+        break;
+      }
+      if (_thread == NULL) {
+        _rc->add_root(JVMPI_GC_ROOT_JNI_GLOBAL, obj_p);
+      } else {
+        _rc->add_root(JVMPI_GC_ROOT_JNI_LOCAL, obj, _thread, _frame_id, obj_p);
+      }
+      break;
+    case _stack_frame:
+      if (_is_native_frame) {
+        _rc->add_root(JVMPI_GC_ROOT_NATIVE_STACK, obj, _thread);
+      } else if (_is_entry_frame) {
+        // JNI local refs in an entry frame have been traversed separately earlier.
+        // So skip these JNI local refs when they are traversed again in oops_do()
+        // call for this entry frame.
+
+        if (obj != JNIHandles::deleted_handle() && !_rc->is_jni_local(_thread, _frame_id, obj_p)) {
+          _rc->add_root(JVMPI_GC_ROOT_JAVA_FRAME, obj, _thread, _frame_id);
+        }
+      } else {
+        _rc->add_root(JVMPI_GC_ROOT_JAVA_FRAME, obj, _thread, _frame_id);
+      }
+      break;
+    case _system_class:
+      if (is_klass) {
+        _rc->add_root(JVMPI_GC_ROOT_STICKY_CLASS, obj);
+      }
+      break;
+    case _thread_block:
+      assert(_thread != NULL, "NULL thread for CollectRootOopsClosure::_thread_block type");
+      _rc->add_root(JVMPI_GC_ROOT_THREAD_BLOCK, obj, _thread);
+      break;
+    case _monitor_used:
+      _rc->add_root(JVMPI_GC_ROOT_MONITOR_USED, obj);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+// Implementation of RootElementForFrame class
+void RootElementForFrame::dump_roots(Dump* dump, JNIEnv* env_id) const {
+  int length, i;
+
+  length = (_roots != NULL ? _roots->length() : 0);
+  for (i = 0; i < length; i++) {
+    dump->dump_type(JVMPI_GC_ROOT_JAVA_FRAME);
+    dump->dump_oop(_roots->at(i));
+    dump->dump_thread(env_id);
+    dump->dump_u4(_depth);
+  }
+
+  length = (_jni_local_roots != NULL ? _jni_local_roots->length() : 0);
+  for (i = 0; i < length; i++) {
+    dump->dump_type(JVMPI_GC_ROOT_JNI_LOCAL);
+    dump->dump_oop(_jni_local_roots->at(i));
+    dump->dump_thread(env_id);
+    dump->dump_u4(_depth);
+  }
+}
+
+jlong RootElementForFrame::root_dump_size() const {
+  jlong size = (_roots != NULL ? _roots->length() : 0) * ROOT_JAVA_FRAME_SIZE;
+  size += (_jni_local_roots != NULL ? _jni_local_roots->length() : 0) * ROOT_JNI_LOCAL_SIZE;
+
+  return size;
+};
+
+// Implementation of RootElementForThread class
+RootElementForThread::RootElementForThread(JavaThread* t, bool is_collect_roots) {
+  _thread = t;
+  _next = NULL;
+  _frameRootInfo = NULL;
+  _empty_java_frame = NULL;
+  _thread_block_roots = NULL;
+  _native_stack_roots = NULL;
+  _num_frames = 0;
+  curRootFrame = NULL;
+
+  if (is_collect_roots) {
+    // create root arrays for collecting roots
+    _native_stack_roots = new (ResourceObj::C_HEAP)GrowableArray<oop>(INIT_ROOTS_ARRAY_SIZE, true);
+    _thread_block_roots = new (ResourceObj::C_HEAP)GrowableArray<oop>(INIT_ROOTS_ARRAY_SIZE, true);
+  }
+
+  get_stack_trace();
+}
+
+RootElementForThread::~RootElementForThread() {
+  RootElementForFrame* p = _frameRootInfo;
+  while (p != NULL) {
+    RootElementForFrame *q = p;
+    p = p->next();
+    delete(q);
+  }
+  delete _empty_java_frame;
+  if (_native_stack_roots != NULL) {
+    delete _native_stack_roots;
+  }
+  if (_thread_block_roots != NULL) {
+    delete _thread_block_roots;
+  }
+}
+
+void RootElementForThread::get_stack_trace(){
+  assert(_thread->thread_state() != _thread_in_Java, "All threads must be blocked at safepoint");
+
+  if (!_thread->has_last_Java_frame()) {
+    _empty_java_frame = new RootElementForFrame(0, false);
+    _empty_java_frame->set_depth(-1);
+    return;
+  }
+
+  vframeStream vfst(_thread);
+  RootElementForFrame* last = NULL;
+  int count = 0;
+
+  // Get call trace for this JavaThread
+  for (; !vfst.at_end(); vfst.next(), count++) {
+    methodOop m = vfst.method(); // The method is not stored GC safe
+    int bci     = vfst.bci();
+    int lineno  = m->is_native() ? (-3) : m->line_number_from_bci(bci);
+
+    RootElementForFrame* p = new RootElementForFrame(vfst.frame_id(),
+                                                     m->is_native(),
+                                                     m->jmethod_id(),
+                                                     lineno);
+    if (last == NULL) {
+      _frameRootInfo = p;
+    } else {
+      last->set_next(p);
+    }
+    last = p;
+  }
+
+  _num_frames = count;
+  for (RootElementForFrame* p = _frameRootInfo; p != NULL; p = p->next(), count--) {
+    p->set_depth(count);
+  }
+}
+
+RootElementForFrame* RootElementForThread::get_frame(intptr_t* id) {
+  if (_num_frames == 0) {
+    return _empty_java_frame;
+  }
+
+  if (id == NULL) {
+    // set to the top vframe
+    return _frameRootInfo;
+  } else if (curRootFrame == NULL || curRootFrame->frame_id() != id) {
+    // find the one with a matching id
+    curRootFrame = NULL;
+    for (RootElementForFrame* p = _frameRootInfo; p != NULL; p = p->next()) {
+      if (p->frame_id() == id) {
+        curRootFrame = p;
+        return curRootFrame;
+      }
+    }
+  }
+  return curRootFrame;
+}
+
+bool RootElementForThread::is_jni_local(intptr_t* id, oop* obj_p) {
+  RootElementForFrame* fr = get_frame(id);
+
+  assert(fr != NULL, "Java Frame not found");
+  return fr->is_jni_local(obj_p);
+}
+
+void RootElementForThread::add_root_to_frame(jint root_type, oop root, intptr_t* id, oop* obj_p) {
+  RootElementForFrame* fr = get_frame(id);
+
+  assert(fr != NULL, "Java Frame not found");
+
+  if (root_type == JVMPI_GC_ROOT_JNI_LOCAL) {
+    fr->add_jni_local(root, obj_p);
+  } else {
+    fr->add_root(root);
+  }
+}
+
+
+void RootElementForThread::add_root(jint root_type, oop root, intptr_t* id, oop* obj_p) {
+  switch (root_type) {
+    case JVMPI_GC_ROOT_JNI_LOCAL:
+      add_root_to_frame(root_type, root, id, obj_p);
+      break;
+    case JVMPI_GC_ROOT_JAVA_FRAME:
+      add_root_to_frame(root_type, root, id);
+      break;
+    case JVMPI_GC_ROOT_NATIVE_STACK:
+      _native_stack_roots->append(root);
+      break;
+    case JVMPI_GC_ROOT_THREAD_BLOCK:
+      _thread_block_roots->append(root);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+jlong RootElementForThread::root_dump_size() const {
+  jlong size = (_empty_java_frame != NULL ? _empty_java_frame->root_dump_size() : 0) +
+              (_native_stack_roots->length() * ROOT_NATIVE_STACK_SIZE) +
+              (_thread_block_roots->length() * ROOT_THREAD_BLOCK_SIZE);
+
+  for (RootElementForFrame* p = _frameRootInfo; p != NULL; p = p->next()) {
+    size += p->root_dump_size();
+  }
+
+  return size;
+};
+
+void RootElementForThread::dump_roots(Dump* dump) const {
+  JNIEnv* env_id = _thread->jni_environment();
+
+  if (_num_frames == 0) {
+    _empty_java_frame->dump_roots(dump, env_id);
+  } else {
+    for (RootElementForFrame* p = _frameRootInfo; p != NULL; p = p->next()) {
+      p->dump_roots(dump, env_id);
+    }
+  }
+
+  int length, i;
+
+  length = _native_stack_roots->length();
+  for (i = 0; i < length; i++) {
+    dump->dump_type(JVMPI_GC_ROOT_NATIVE_STACK);
+    dump->dump_oop(_native_stack_roots->at(i));
+    dump->dump_thread(env_id);
+  }
+  length = _thread_block_roots->length();
+  for (i = 0; i < length; i++) {
+    dump->dump_type(JVMPI_GC_ROOT_THREAD_BLOCK);
+    dump->dump_oop(_thread_block_roots->at(i));
+    dump->dump_thread(env_id);
+  }
+}
+
+void RootElementForThread::dump_calltrace(CallTraceDump* dump) const {
+  dump->set_calltrace(_thread, _num_frames);
+  for (RootElementForFrame* p = _frameRootInfo; p != NULL; p = p->next()) {
+    dump->set_callframe(p->lineno(), p->method_id());
+  }
+}
+
+// Implementation of HeapDumpInfoCollector
+HeapDumpInfoCollector::HeapDumpInfoCollector(bool collect_gc_roots) {
+  // initialize _threadRootInfo before collecting roots
+  RootElementForThread* q = NULL;
+  _num_threads = 0;
+  for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) {
+    RootElementForThread* p = new RootElementForThread(thread, collect_gc_roots);
+    if (q == NULL) {
+      _threadRootInfo = p;
+    } else {
+      q->set_next(p);
+    }
+    q = p;
+    _num_threads++;
+  }
+
+  if (collect_gc_roots) {
+    _jni_global_roots = new (ResourceObj::C_HEAP) GrowableArray<oop*>(INIT_ROOTS_ARRAY_SIZE, true);
+    _sticky_class_roots = new (ResourceObj::C_HEAP) GrowableArray<oop>(INIT_ROOTS_ARRAY_SIZE, true);
+    _monitor_used_roots = new (ResourceObj::C_HEAP) GrowableArray<oop>(INIT_ROOTS_ARRAY_SIZE, true);
+    _unknown_roots = new (ResourceObj::C_HEAP) GrowableArray<oop>(INIT_ROOTS_ARRAY_SIZE, true);
+    curRootThread = NULL;
+    collect_roots();
+  }
+  is_collect_roots = collect_gc_roots;
+}
+
+HeapDumpInfoCollector::~HeapDumpInfoCollector() {
+  RootElementForThread* p = _threadRootInfo;
+  while (p != NULL) {
+    RootElementForThread *q = p;
+    p = p->next();
+    delete(q);
+  }
+
+  if (is_collect_roots) {
+    delete _jni_global_roots;
+    delete _sticky_class_roots;
+    delete _monitor_used_roots;
+    delete _unknown_roots;
+  }
+}
+
+// Collect roots for heap dump
+// Note: the current implemenation of collect_roots() requires explicit knowledge
+// about GC strong roots as well as explicit knowledge about frames.  This function
+// may need to be modified if future modification to the VM internal structures is
+// made.  Watch for future modification to oops_do() methods.
+//
+// Another way to implement it is to modify OopClosure class to add new methods
+// (nop by default) for passing additional profiling information. In addition,
+// modify oops_do() method in various classes to call those OopClosure new
+// methods to pass the root type information.  However, it is not advised to
+// modify OopClosure to affect its simplicity and its semantics. So we chose
+// the current implemenation.
+//
+void HeapDumpInfoCollector::collect_roots() {
+  CollectRootOopsClosure blk(this);
+
+  // Traverse all system classes
+  blk.begin_iterate(CollectRootOopsClosure::_system_class);
+  SystemDictionary::always_strong_oops_do(&blk);
+  blk.end_iterate(CollectRootOopsClosure::_system_class);
+
+  // Traverse all JNI Global references
+  blk.set_thread(NULL);
+  blk.begin_iterate(CollectRootOopsClosure::_jni_handle);
+  JNIHandles::oops_do(&blk);   // Global (strong) JNI handles
+  blk.end_iterate(CollectRootOopsClosure::_jni_handle);
+
+  // Traverse all monitor objects
+  blk.begin_iterate(CollectRootOopsClosure::_monitor_used);
+  ObjectSynchronizer::oops_do(&blk);
+  blk.end_iterate(CollectRootOopsClosure::_monitor_used);
+
+  // Traverse JNI locals and frames for all Java threads
+  RootElementForFrame *prev_reff = NULL;
+  for (JavaThread* thread = Threads::first(); thread != NULL ; thread = thread->next()) {
+    blk.set_thread(thread);
+    set_curRootThread(thread);
+
+    // get all JNI local references for the top frame
+    blk.begin_iterate(CollectRootOopsClosure::_jni_handle);
+    thread->active_handles()->oops_do(&blk);
+    blk.end_iterate(CollectRootOopsClosure::_jni_handle);
+
+    // Traverse the execution stack
+    blk.begin_iterate(CollectRootOopsClosure::_stack_frame);
+    if (thread->has_last_Java_frame()) {
+      for(StackFrameStream fst(thread); !fst.is_done(); fst.next()) {
+        frame* fr = fst.current();
+
+        // skip the first entry frame
+        if (fr->is_first_frame()) continue;
+
+        blk.set_frame_type(fr->is_native_frame(), fr->is_entry_frame());
+        if (fr->is_entry_frame()) {
+          // An entry frame is considered part of the previous Java
+          // frame on the stack. Use the id from the previous frame
+          // that was found on the RootElementForFrame list.
+          assert(prev_reff != NULL, "must have previous frame");
+          blk.set_frame_id(prev_reff->frame_id());
+
+          // traverse the JNI local refs stored in JavaCallWrapper for an entry frame
+          blk.begin_iterate(CollectRootOopsClosure::_jni_handle);
+          fr->entry_frame_call_wrapper()->handles()->oops_do(&blk);
+          blk.end_iterate(CollectRootOopsClosure::_jni_handle);
+
+        } else {
+          // remember id of the current frame for frame information in the oops traversal.
+          blk.set_frame_id(fr->id());
+        }
+        fr->oops_do(&blk, fst.register_map());
+
+        // If the current frame is found on the RootElementForFrame
+        // list, then save it for a possible "entry frame" later.
+        RootElementForFrame *reff = curRootThread->get_frame(fr->id());
+        if (reff != NULL) {
+          prev_reff = reff;
+        }
+      }
+    }
+    blk.end_iterate(CollectRootOopsClosure::_stack_frame);
+  }
+
+  // sort and remove duplicates
+  // no need to sort _jni_global_roots because all JNI global references are
+  // traversed only once.
+  for (RootElementForThread* p = _threadRootInfo; p != NULL; p = p->next()) {
+    p->sort_roots();
+  }
+  sort_roots(_sticky_class_roots);
+  sort_roots(_monitor_used_roots);
+  sort_roots(_unknown_roots);
+}
+
+static int cmp(oop* x, oop* y) { return (oopDesc*)*x - (oopDesc*)*y; }
+void HeapDumpInfoCollector::sort_roots(GrowableArray<oop>* roots) {
+  if (roots == NULL) return;
+
+  // sort roots
+  roots->sort(cmp);
+
+  // remove duplicates by compacting array
+  const int len = roots->length();
+  oop obj = NULL; // we don't need NULL roots
+  int j = 0;
+  for (int i = 0; i < len; i++) {
+    assert(i >= j, "algorithmic error");
+    if (roots->at(i) != obj) {
+      obj = roots->at(i);
+      roots->at_put(j++, obj);
+    }
+  }
+  roots->trunc_to(j);
+  assert(roots->length() == j, "just checking");
+}
+
+void HeapDumpInfoCollector::set_curRootThread(JavaThread *thread) {
+  if (curRootThread == NULL || curRootThread->thread() != thread) {
+    curRootThread = NULL;
+    for (RootElementForThread* p = _threadRootInfo; p != NULL; p = p->next()) {
+      if (p->thread() == thread) {
+        curRootThread = p;
+        break;
+      }
+    }
+  }
+  assert(curRootThread != NULL, "Thread not found");
+}
+
+bool HeapDumpInfoCollector::is_jni_local(JavaThread* thread, intptr_t* sp, oop* obj_p) {
+  set_curRootThread(thread);
+  return curRootThread->is_jni_local(sp, obj_p);
+}
+
+jlong HeapDumpInfoCollector::root_dump_size() const {
+  jlong size = (_jni_global_roots->length() * ROOT_JNI_GLOBAL_SIZE) +
+              (_sticky_class_roots->length() * ROOT_STICKY_CLASS_SIZE) +
+              (_monitor_used_roots->length() * ROOT_MONITOR_USED_SIZE) +
+              (_unknown_roots->length() * ROOT_UNKNOWN_SIZE);
+
+  for (RootElementForThread* p = _threadRootInfo; p != NULL; p = p->next()) {
+    size += p->root_dump_size();
+  }
+  return size;
+}
+
+void HeapDumpInfoCollector::dump_roots(Dump* dump) const {
+  for (RootElementForThread* p = _threadRootInfo; p != NULL; p = p->next()) {
+    p->dump_roots(dump);
+  }
+
+  int length, i;
+
+  length = _jni_global_roots->length();
+  for (i = 0; i < length; i++) {
+    oop* handle = _jni_global_roots->at(i);
+    oop obj = *handle;
+
+    dump->dump_type(JVMPI_GC_ROOT_JNI_GLOBAL);
+    if (obj->is_klass()) {
+      obj = Klass::cast((klassOop)obj)->java_mirror();
+    }
+    dump->dump_oop(obj);
+    dump->dump_voids((void*) handle);
+  }
+  length = _sticky_class_roots->length();
+  for (i = 0; i < length; i++) {
+    dump->dump_type(JVMPI_GC_ROOT_STICKY_CLASS);
+    dump->dump_oop(_sticky_class_roots->at(i));
+  }
+  length = _monitor_used_roots->length();
+  for (i = 0; i < length; i++) {
+    dump->dump_type(JVMPI_GC_ROOT_MONITOR_USED);
+    dump->dump_oop(_monitor_used_roots->at(i));
+  }
+  length = _unknown_roots->length();
+  for (i = 0; i < length; i++) {
+    dump->dump_type(JVMPI_GC_ROOT_UNKNOWN);
+    dump->dump_oop(_unknown_roots->at(i));
+  }
+
+}
+
+void HeapDumpInfoCollector::add_root_to_thread(jint root_type, oop root, JavaThread* thread, intptr_t* sp, oop* obj_p) {
+  set_curRootThread(thread);
+  curRootThread->add_root(root_type, root, sp, obj_p);
+}
+
+void HeapDumpInfoCollector::add_root(jint root_type, oop* root) {
+  assert(root_type == JVMPI_GC_ROOT_JNI_GLOBAL, "Must be JNI globals");
+
+  bool is_root = true;
+  int length = (skipped_globalrefs != NULL ? skipped_globalrefs->length() : 0);
+  for (int i = 0; i < length; i++) {
+    if (skipped_globalrefs->at(i) == (jobject) root) {
+      is_root = false;
+      break;
+    }
+  }
+
+  if (is_root) {
+    _jni_global_roots->append(root);
+  }
+}
+
+void HeapDumpInfoCollector::add_root(jint root_type, oop root, JavaThread* thread, intptr_t* sp, oop* obj_p) {
+  switch (root_type) {
+    case JVMPI_GC_ROOT_UNKNOWN:
+      _unknown_roots->append(root);
+      break;
+    case JVMPI_GC_ROOT_JNI_LOCAL:
+      add_root_to_thread(root_type, root, thread, sp, obj_p);
+      break;
+    case JVMPI_GC_ROOT_JAVA_FRAME:
+    case JVMPI_GC_ROOT_NATIVE_STACK:
+      add_root_to_thread(root_type, root, thread, sp);
+      break;
+    case JVMPI_GC_ROOT_STICKY_CLASS:
+      _sticky_class_roots->append(root);
+      break;
+    case JVMPI_GC_ROOT_THREAD_BLOCK:
+      add_root_to_thread(root_type, root, thread, sp);
+      break;
+    case JVMPI_GC_ROOT_MONITOR_USED:
+      _monitor_used_roots->append(root);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void HeapDumpInfoCollector::dump_calltraces(CallTraceDump* dump) const {
+  dump->set_num_traces(_num_threads);
+  for (RootElementForThread* p = _threadRootInfo; p != NULL; p = p->next()) {
+    p->dump_calltrace(dump);
+  }
+}
+
+void jvmpi::post_object_dump_event(oop obj, int flag) {
+  No_GC_Verifier nogc;
+  Dump dump;
+  // 1st dump to measure dump size
+  { ObjectDumper od(&dump, JVMPI_DUMP_LEVEL_2, obj); }
+  // 2nd dump to actually write dump
+  dump.enable_write(dump.size());
+  { ObjectDumper od(&dump, JVMPI_DUMP_LEVEL_2, obj); }
+  // create event
+  JVMPI_Event event;
+  event.event_type             = JVMPI_EVENT_OBJECT_DUMP | flag;
+  event.u.heap_dump.begin      = (char*)dump.begin();
+  event.u.heap_dump.end        = (char*)dump.end  ();
+  event.u.heap_dump.num_traces = 0;
+  event.u.heap_dump.traces     = NULL;
+  // post event
+  post_event_vm_mode(&event, NULL);
+}
+
+class VM_JVMPIPostHeapDump: public VM_Operation {
+ private:
+  Dump* _dump;
+  int   _level;
+  int   _flag;
+  CallTraceDump* _traces;
+ public:
+  VM_JVMPIPostHeapDump(Dump* dump, int level, int flag, CallTraceDump *traces) {
+    _dump   = dump;
+    _level  = level;
+    _flag   = flag;
+    _traces = traces;
+  }
+  void doit() {
+    // 1st heap dump to measure dump size for heap objects
+    { JvmpiHeapDumper hd(_dump, _level); }
+    // collect VM roots and dump them
+    if (_level == JVMPI_DUMP_LEVEL_0) {
+      // dump level 0 => no roots
+      HeapDumpInfoCollector rd(false);
+      _dump->enable_write(_dump->size());
+      rd.dump_calltraces(_traces);
+    } else {
+      // dump level 1 & 2 => include roots
+      HeapDumpInfoCollector rd(true);
+      debug_only(int heap_dump_size = _dump->size());
+      debug_only(int gc_root_dump_size = rd.root_dump_size());
+
+      _dump->enable_write((int) rd.root_dump_size() + _dump->size());
+      rd.dump_roots(_dump);
+      rd.dump_calltraces(_traces);
+      assert((int) rd.root_dump_size() == _dump->size(), "dump size inconsistent");
+    }
+    // 2nd heap dump to actually write heap objects
+    { JvmpiHeapDumper hd(_dump, _level); }
+
+    // Disable GC to prevent GC from happening before the agent
+    // finishes processing the heap dump.
+
+    GC_locker::lock();
+  }
+  const char* name() const { return "post JVMPI heap dump"; }
+};
+
+
+void jvmpi::post_heap_dump_event_in_safepoint(int level, int flag) {
+  Dump dump;
+  CallTraceDump traces;
+
+  {
+    // We must acquire the Heap_lock before collecting heap dump
+    MutexLocker ml(Heap_lock);
+
+    // We count and collect the heap information at a safepoint
+    VM_JVMPIPostHeapDump op(&dump, level, flag, &traces);
+    VMThread::execute(&op);
+  }
+
+  // Create and post the event in the JavaThread
+  // We don't put this in a doit_epilogue to avoid exposing the Dump class
+  //  assert(Thread::current()->is_Java_thread(), "must be in JavaThread");
+  JVMPI_Event event;
+  event.event_type             = JVMPI_EVENT_HEAP_DUMP | flag;
+  event.u.heap_dump.dump_level = level;
+  event.u.heap_dump.begin      = (char*)dump.begin();
+  event.u.heap_dump.end        = (char*)dump.end  ();
+  event.u.heap_dump.num_traces = traces.get_num_traces();
+  event.u.heap_dump.traces     = traces.get_calltraces();
+  // post event
+  post_event_vm_mode(&event, NULL);
+
+  // Enable GC
+  GC_locker::unlock();
+}
+
+
+class VM_JVMPIPostMonitorDump: public VM_Operation {
+ private:
+  Dump* _dump;
+  int   _flag;
+ public:
+  VM_JVMPIPostMonitorDump(Dump* dump, int flag) { _dump = dump; _flag = flag; }
+  void doit() {
+    // 1st dump to measure dump size
+    { JavaMonitorDumper md(_dump);
+      RawMonitorDumper rmd(_dump);
+    }
+    // 2nd dump to actually write dump
+    _dump->enable_write(_dump->size());
+    { JavaMonitorDumper md(_dump);
+      RawMonitorDumper rmd(_dump);
+    }
+  }
+  const char* name() const { return "post JVMPI monitor dump"; }
+};
+
+
+void jvmpi::post_monitor_dump_event_in_safepoint(int flag) {
+  Dump dump;
+  // We count and collect the monitor information at a safepoint
+  VM_JVMPIPostMonitorDump op(&dump, flag);
+  VMThread::execute(&op);
+  // Create and post the event in the JavaThread
+  // We don't put this in a doit_epilogue to avoid exposing the Dump class
+//  assert(Thread::current()->is_Java_thread(), "must be in JavaThread");
+  JVMPI_Event event;
+  event.event_type = JVMPI_EVENT_MONITOR_DUMP | flag;
+  event.u.monitor_dump.begin          = (char*)dump.begin();
+  event.u.monitor_dump.end            = (char*)dump.end  ();
+  event.u.monitor_dump.num_traces     = 0;
+  event.u.monitor_dump.threads_status = 0;
+  // post event
+  post_event_vm_mode(&event, NULL);
+}
+
+
+bool should_invalidate_nmethods(jint event_type) {
+  switch (event_type) {
+    case JVMPI_EVENT_METHOD_ENTRY : // fall through
+    case JVMPI_EVENT_METHOD_ENTRY2: // fall through
+    case JVMPI_EVENT_METHOD_EXIT  : return true;
+  }
+  return false;
+}
+
+
+void invalidate_nmethods() {
+  // need do deoptimize all frames; for the moment we just make all methods
+  // non-entrant
+}
+
+
+bool needs_slow_allocation(jint event_type) {
+  switch(event_type) {
+    case JVMPI_EVENT_OBJECT_ALLOC       : // fall through
+    case JVMPI_EVENT_OBJECT_MOVE        : // fall through
+    case JVMPI_EVENT_OBJECT_FREE        : // fall through
+    case JVMPI_EVENT_ARENA_NEW          : // fall through
+    case JVMPI_EVENT_DELETE_ARENA       : // fall through
+    case JVMPI_EVENT_JNI_GLOBALREF_ALLOC: // fall through
+    case JVMPI_EVENT_JNI_GLOBALREF_FREE : return true;
+  }
+  return false;
+}
+
+void jvmpi::reset_jvmpi_allocation() {
+  bool use_jvmpi_allocation = (is_event_enabled(JVMPI_EVENT_OBJECT_ALLOC) ||
+                               is_event_enabled(JVMPI_EVENT_OBJECT_MOVE)  ||
+                               is_event_enabled(JVMPI_EVENT_OBJECT_FREE)  ||
+                               is_event_enabled(JVMPI_EVENT_ARENA_NEW)    ||
+                               is_event_enabled(JVMPI_EVENT_DELETE_ARENA) ||
+                               is_event_enabled(JVMPI_EVENT_JNI_GLOBALREF_ALLOC) ||
+                               is_event_enabled(JVMPI_EVENT_JNI_GLOBALREF_FREE));
+
+  if (use_jvmpi_allocation && !slow_allocation) {
+    // Enable slow allocation
+
+    slow_allocation = true;
+    Universe::set_jvmpi_alloc_event_enabled(Universe::_jvmpi_enabled);
+
+    // Note:  I think disabling GC-events should be done only
+    // during startup time.  When the agent is ready to handle
+    // GC-events, we should report it.  As this piece of code
+    // has been there for a while, I just leave it as it is but
+    // we should look into it in jvmpi 2.0.
+
+    // it is too early to report GC-events
+    bool old_gc_start = is_event_enabled(JVMPI_EVENT_GC_START);
+    bool old_gc_finish = is_event_enabled(JVMPI_EVENT_GC_FINISH);
+    disable_event(JVMPI_EVENT_GC_START);
+    disable_event(JVMPI_EVENT_GC_FINISH);
+
+    // ensure that the heap is initialized the way we want it to be;
+    // in particular, the new generation must be filled so we always
+    // perform slow allocations
+    Universe::heap()->collect(GCCause::_java_lang_system_gc);
+
+    if (old_gc_start) enable_event(JVMPI_EVENT_GC_START);
+    if (old_gc_finish) enable_event(JVMPI_EVENT_GC_FINISH);
+
+  } else if (!use_jvmpi_allocation && slow_allocation) {
+    // Disable slow allocation
+
+    slow_allocation = false;
+
+    // Do a GC to enable the heap for fast allocation since the new generation
+    // was filled up for slow allocation.
+    // Note that fast allocation is not immediately turned on until a GC
+    // is completed.  If GC is disabled (due to some other jvmpi events),
+    // this GC is cancelled and the new generation is still filled up.
+
+    Universe::set_jvmpi_alloc_event_enabled(Universe::_jvmpi_disabling);
+    Universe::heap()->collect(GCCause::_java_lang_system_gc);
+  }
+}
+
+// ----------------------------------------------
+// Functions exported through the JVMPI interface
+// ----------------------------------------------
+
+JVMPI_ENTRY(jint, jvmpi::enable_event(jint event_type, void *arg))
+  if (!is_event_supported(event_type)) {
+    return JVMPI_NOT_AVAILABLE;
+  }
+
+  enable_event(event_type);
+  if (should_invalidate_nmethods(event_type)) {
+    invalidate_nmethods();
+  }
+  if (event_type == JVMPI_EVENT_OBJECT_MOVE) {
+    Universe::set_jvmpi_move_event_enabled(true);
+  } else if (event_type == JVMPI_EVENT_METHOD_ENTRY || event_type == JVMPI_EVENT_METHOD_ENTRY2) {
+  // Missing disabling of inlining
+    // Inline flag is a constant in product mode
+    // Inline = false;
+  } else if (event_type == JVMPI_EVENT_JNI_GLOBALREF_ALLOC) {
+    Universe::set_jvmpi_jni_global_alloc_event_enabled(true);
+  } else if (event_type == JVMPI_EVENT_JNI_GLOBALREF_FREE) {
+    Universe::set_jvmpi_jni_global_free_event_enabled(true);
+  } else if (event_type == JVMPI_EVENT_JNI_WEAK_GLOBALREF_ALLOC) {
+    Universe::set_jvmpi_jni_weak_global_alloc_event_enabled(true);
+  } else if (event_type == JVMPI_EVENT_JNI_WEAK_GLOBALREF_FREE) {
+    Universe::set_jvmpi_jni_weak_global_free_event_enabled(true);
+  }
+
+  // enable slow allocation, if necessary
+  if (!slow_allocation && needs_slow_allocation(event_type)) {
+    reset_jvmpi_allocation();
+  }
+  return JVMPI_SUCCESS;
+JVMPI_END
+
+
+JVMPI_ENTRY(jint, jvmpi::disable_event(jint event_type, void *arg))
+  if (!is_event_supported(event_type)) {
+    return JVMPI_NOT_AVAILABLE;
+  }
+
+  if (should_invalidate_nmethods(event_type)) {
+    invalidate_nmethods();
+  }
+  disable_event(event_type);
+
+  if (event_type == JVMPI_EVENT_OBJECT_MOVE) {
+    Universe::set_jvmpi_move_event_enabled(false);
+  } else if (event_type == JVMPI_EVENT_JNI_GLOBALREF_ALLOC) {
+    Universe::set_jvmpi_jni_global_alloc_event_enabled(false);
+  } else if (event_type == JVMPI_EVENT_JNI_GLOBALREF_FREE) {
+    Universe::set_jvmpi_jni_global_free_event_enabled(false);
+  } else if (event_type == JVMPI_EVENT_JNI_WEAK_GLOBALREF_ALLOC) {
+    Universe::set_jvmpi_jni_weak_global_alloc_event_enabled(false);
+  } else if (event_type == JVMPI_EVENT_JNI_WEAK_GLOBALREF_FREE) {
+    Universe::set_jvmpi_jni_weak_global_free_event_enabled(false);
+  }
+
+  // disable slow allocation and use fast allocation, if necessary
+  if (slow_allocation && needs_slow_allocation(event_type)) {
+    reset_jvmpi_allocation();
+  }
+  return JVMPI_SUCCESS;
+JVMPI_END
+
+
+JVMPI_ENTRY(void, jvmpi::disable_gc())
+  GC_locker::lock();
+JVMPI_END
+
+
+JVMPI_ENTRY(void, jvmpi::enable_gc())
+  GC_locker::unlock();
+JVMPI_END
+
+inline bool is_valid_method(methodOop method) {
+  if (method == NULL ||
+      !method->is_perm() ||
+      oop(method)->klass() != Universe::methodKlassObj() ||
+      !method->is_method()) {
+    return false;   // doesn't look good
+  }
+  return true;      // hopefully this is a method indeed
+}
+
+// Return the top-most frame that can be used for vframeStream
+// This frame will be skipped by vframeStream for stack walking.
+frame is_walkable_frame(JavaThread* thread, frame* fr, methodOop* method_p, int* bci_p) {
+  methodOop method = NULL;
+  int bci = -1;
+  frame walkframe;
+
+  if (fr->is_interpreted_frame()) {
+    // top frame is an interpreted frame
+    // check if it is walkable (i.e. valid methodOop and valid bci)
+    if (fr->is_interpreted_frame_valid()) {
+      if (fr->fp() != NULL) {
+        // access address in order not to trigger asserts that
+        // are built in interpreter_frame_method function
+        method = *fr->interpreter_frame_method_addr();
+        if (is_valid_method(method)) {
+          intptr_t bcx = fr->interpreter_frame_bcx();
+          bci = method->validate_bci_from_bcx(bcx);
+          walkframe = *fr;
+        } else {
+          method = NULL;
+        }
+      }
+    }
+
+  } else {
+    method = NULL;
+    walkframe = *fr;
+#ifndef CORE
+    // Determine if this top frame is executing a Java method.
+    if (CodeCache::contains(fr->pc())) {
+      // top frame is a compiled frame or stub routines
+      CodeBlob* cb = CodeCache::find_blob(fr->pc());
+      if (cb->is_nmethod()) {
+        method = ((nmethod *)cb)->method();
+      }
+    }
+#endif
+  }
+
+  if (method_p != NULL) {
+    *method_p = method;
+  }
+  if(bci_p != NULL) {
+    *bci_p = bci;
+  }
+  return walkframe;
+}
+
+// The thread we are examining must be suspended
+void fill_call_trace_at_safepoint(JavaThread* thd, JVMPI_CallTrace* trace, int depth) {
+  vframeStream st(thd);
+
+  int count = 0;
+  // collect the rest
+  for (;!st.at_end() && count < depth; st.next(), count++) {
+    methodOop m = st.method(); // The method is not stored GC safe
+    int bci     = st.bci();
+    int lineno  = m->is_native() ? (-3) : m->line_number_from_bci(bci);
+    trace->frames[count].method_id = m->jmethod_id();
+    trace->frames[count].lineno = lineno;
+  }
+
+  trace->num_frames = count;
+  if (TraceJVMPI) {
+    tty->cr();
+    tty->print_cr("JVMPI: fill_call_trace_at_safepoint return, thread: " INTPTR_FORMAT ", trace->num_frames = %d\n",
+		  thd, trace->num_frames);
+  }
+  return;
+}
+
+void fill_call_trace_given_top(JavaThread* thd, JVMPI_CallTrace* trace, int depth, frame top_frame) {
+  frame walkframe;
+  methodOop method;
+  int bci;
+  int count;
+
+  count = 0;
+  assert(trace->frames != NULL, "trace->frames must be non-NULL");
+
+  walkframe = is_walkable_frame(thd, &top_frame, &method, &bci);
+  if (method != NULL) {
+    count++;
+    trace->num_frames = count;
+    trace->frames[0].method_id = method->jmethod_id();
+    if (!method->is_native()) {
+      trace->frames[0].lineno = method->line_number_from_bci(bci);
+    } else {
+      trace->frames[0].lineno = -3;
+    }
+  }
+
+  // return if no walkable frame is found
+  if (walkframe.sp() == NULL) {
+    return;
+  }
+
+  // check has_last_Java_frame() after looking at the top frame
+  // which may be an interpreted Java frame.
+  if (!thd->has_last_Java_frame() && count == 0) {
+    trace->num_frames = 0;
+    return;
+  }
+
+  vframeStream st(thd, walkframe);
+  for (; !st.at_end() && count < depth; st.next(), count++) {
+    bci = st.bci();
+    method = st.method(); // The method is not stored GC safe
+
+    trace->frames[count].method_id = method->jmethod_id();
+    if (!method->is_native()) {
+      trace->frames[count].lineno = method->line_number_from_bci(bci);
+    } else {
+      trace->frames[count].lineno = -3;
+    }
+  }
+  trace->num_frames = count;
+  return;
+}
+
+JVMPI_ENTRY(void, jvmpi::get_call_trace(JVMPI_CallTrace *trace, jint depth))
+  JavaThread* thd;
+  ResourceMark rm;
+
+  trace->num_frames = 0;
+  if (!((trace->env_id) && (thd = JavaThread::thread_from_jni_environment(trace->env_id))))  {
+    return;
+  }
+
+  // ensure thread suspension completed for other threads
+  // Note: need to ensure hprof agent actually suspends threads
+  // May need to temporarily suspend thread for the caller
+  uint32_t debug_bits = 0;
+  if (thd != Thread::current()) {
+    if (!thd->wait_for_ext_suspend_completion(SuspendRetryCount,
+        SuspendRetryDelay, &debug_bits)) {
+      return;
+    }
+  }
+
+  switch (thd->thread_state()) {
+    // The thread is either in the VM or in native code so use information
+    // from the last Java frame.
+    case _thread_blocked:
+    case _thread_in_native:
+    case _thread_in_vm:
+      if (thd->has_last_Java_frame()) {
+        fill_call_trace_at_safepoint(thd, trace, depth);
+      }
+      break;
+    case _thread_in_Java:
+      { frame fr;
+        trace->num_frames = 0;
+        // profile_last_Java_frame sets up the frame 'fr' and returns true;
+        if (thd->profile_last_Java_frame(&fr)) {
+          fill_call_trace_given_top(thd, trace, depth, fr);
+        }
+      }
+      break;
+    default: break;
+  }
+JVMPI_END
+
+
+JVMPI_ENTRY(jlong, jvmpi::get_current_thread_cpu_time())
+  return os::current_thread_cpu_time();
+JVMPI_END
+
+
+JVMPI_RAW_ENTRY(JVMPI_RawMonitor, jvmpi::raw_monitor_create(char *lock_name))
+  RawMonitor * monitor = new RawMonitor(lock_name, PROF_RM_MAGIC);
+  return (JVMPI_RawMonitor)monitor;
+JVMPI_RAW_END
+
+
+JVMPI_RAW_ENTRY(void, jvmpi::raw_monitor_enter(JVMPI_RawMonitor lock_id))
+  RawMonitor *monitor = (RawMonitor *)lock_id;
+  if (!(PROF_RM_CHECK(monitor))) {
+      return;
+  }
+  if (TraceJVMPI) {
+    tty->cr();
+    tty->print_cr("JVMPI: raw_monitor_enter for thread id " INTPTR_FORMAT " lock_id = " INTPTR_FORMAT " ", THREAD, lock_id);
+  }
+  // JVMPI can't do proper transitions on RAW_ENTRY
+  // Because VM thread posting events can deadlock. When
+  // vmthread posting is fixed enable this code
+  if (THREAD && THREAD->is_Java_thread()) {
+#ifdef PROPER_TRANSITIONS
+    ThreadInVMfromUnknown __tiv;
+    {
+      ThreadBlockInVM __tbivm((JavaThread*)THREAD);
+      monitor->raw_enter(THREAD, true);
+    }
+#else
+
+    /* Transition to thread_blocked without entering vm state          */
+    /* This is really evil. Normally you can't undo _thread_blocked    */
+    /* transitions like this because it would cause us to miss a       */
+    /* safepoint but since the thread was already in _thread_in_native */
+    /* the thread is not leaving a safepoint safe state and it will    */
+    /* block when it tries to return from native. We can't safepoint   */
+    /* block in here because we could deadlock the vmthread. Blech.    */
+
+    JavaThread* jt = (JavaThread*) THREAD;
+    JavaThreadState state = jt->thread_state();
+    assert(state == _thread_in_native, "Must be _thread_in_native");
+    // frame should already be walkable since we are in native
+    assert(!jt->has_last_Java_frame() || jt->frame_anchor()->walkable(), "Must be walkable");
+    jt->set_thread_state(_thread_blocked);
+
+    monitor->raw_enter(THREAD, true);
+
+    // restore state, still at a safepoint safe state
+    jt->set_thread_state(state);
+#endif /* PROPER_TRANSITIONS */
+  } else {
+    monitor->raw_enter(THREAD, true);
+  }
+
+JVMPI_RAW_END
+
+
+JVMPI_RAW_ENTRY(void, jvmpi::raw_monitor_exit(JVMPI_RawMonitor lock_id))
+  RawMonitor *monitor = (RawMonitor *)lock_id;
+  if (!(PROF_RM_CHECK(monitor))) {
+      return;
+  }
+  if (TraceJVMPI) {
+    tty->cr();
+    tty->print_cr("JVMPI: raw_monitor_exit for thread id " INTPTR_FORMAT " lock_id = " INTPTR_FORMAT " ", THREAD, lock_id);
+  }
+  // JVMPI can't do proper transitions on RAW_ENTRY
+  // Because VM thread posting events can deadlock. When
+  // vmthread posting is fixed enable this code
+#ifdef PROPER_TRANSITIONS
+  if (THREAD && THREAD->is_Java_thread()) {
+    ThreadInVMfromUnknown __tiv;
+    monitor->raw_exit(THREAD, true);
+  } else {
+    monitor->raw_exit(THREAD, true);
+  }
+#else
+  // Doesn't block so we don't need to do anything special here
+  monitor->raw_exit(THREAD, true);
+#endif /* PROPER_TRANSITIONS */
+
+JVMPI_RAW_END
+
+
+JVMPI_RAW_ENTRY(void, jvmpi::raw_monitor_destroy(JVMPI_RawMonitor lock_id))
+  RawMonitor *monitor = (RawMonitor *)lock_id;
+  if (!(PROF_RM_CHECK(monitor))) {
+      return;
+  }
+  if (TraceJVMPI) {
+    tty->cr();
+    tty->print_cr("JVMPI: raw_monitor_destroy for thread id " INTPTR_FORMAT " lock_id = " INTPTR_FORMAT " ", THREAD, lock_id);
+  }
+  // JVMPI can't do proper transitions on RAW_ENTRY
+  // Because VM thread posting events can deadlock. When
+  // vmthread posting is fixed enable this code
+#ifdef PROPER_TRANSITIONS
+  if (THREAD && THREAD->is_Java_thread()) {
+    ThreadInVMfromUnknown __tiv;
+    monitor->raw_exit(THREAD, true);
+    monitor->raw_destroy();
+  } else {
+    monitor->raw_exit(THREAD, true);
+    monitor->raw_destroy();
+  }
+#else
+  // Doesn't block so we don't need to do anything special here
+  monitor->raw_exit(THREAD, true);
+  monitor->raw_destroy();
+#endif /* PROPER_TRANSITIONS */
+
+JVMPI_RAW_END
+
+
+JVMPI_RAW_ENTRY(void, jvmpi::raw_monitor_wait(JVMPI_RawMonitor lock_id, jlong ms))
+  RawMonitor *monitor = (RawMonitor *)lock_id;
+  if (!(PROF_RM_CHECK(monitor))) {
+      return;
+  }
+  if (TraceJVMPI) {
+    tty->cr();
+    tty->print_cr("JVMPI: raw_monitor_wait for thread id " INTPTR_FORMAT " lock_id = " INTPTR_FORMAT " ", THREAD, lock_id);
+  }
+  // JVMPI can't do proper transitions on RAW_ENTRY
+  // Because VM thread posting events can deadlock. When
+  // vmthread posting is fixed enable this code
+  if (THREAD && THREAD->is_Java_thread()) {
+#ifdef PROPER_TRANSITIONS
+    ThreadInVMfromUnknown __tiv;
+    {
+      ThreadBlockInVM __tbivm((JavaThread*) THREAD);
+      monitor->raw_wait(ms, true, THREAD);
+    }
+#else
+    /* Transition to thread_blocked without entering vm state          */
+    /* This is really evil. Normally you can't undo _thread_blocked    */
+    /* transitions like this because it would cause us to miss a       */
+    /* safepoint but since the thread was already in _thread_in_native */
+    /* the thread is not leaving a safepoint safe state and it will    */
+    /* block when it tries to return from native. We can't safepoint   */
+    /* block in here because we could deadlock the vmthread. Blech.    */
+
+    JavaThread* jt = (JavaThread*) THREAD;
+    JavaThreadState state = jt->thread_state();
+    assert(state == _thread_in_native, "Must be _thread_in_native");
+    // frame should already be walkable since we are in native
+    assert(!jt->has_last_Java_frame() || jt->frame_anchor()->walkable(), "Must be walkable");
+    jt->set_thread_state(_thread_blocked);
+
+    monitor->raw_wait(ms, true, THREAD);
+    // restore state, still at a safepoint safe state
+    jt->set_thread_state(state);
+
+#endif /* PROPER_TRANSITIONS */
+  } else {
+    monitor->raw_wait(ms, true, THREAD);
+  }
+
+JVMPI_RAW_END
+
+
+JVMPI_RAW_ENTRY(void, jvmpi::raw_monitor_notify_all(JVMPI_RawMonitor lock_id))
+  RawMonitor *monitor = (RawMonitor *)lock_id;
+  if (!(PROF_RM_CHECK(monitor))) {
+      return;
+  }
+  if (TraceJVMPI) {
+    tty->cr();
+    tty->print_cr("JVMPI: raw_monitor_notify_all for thread id " INTPTR_FORMAT " lock_id = " INTPTR_FORMAT " ", THREAD, lock_id);
+  }
+  // JVMPI can't do proper transitions on RAW_ENTRY
+  // Because VM thread posting events can deadlock. When
+  // vmthread posting is fixed enable this code
+#ifdef PROPER_TRANSITIONS
+  if (THREAD && THREAD->is_Java_thread()) {
+    ThreadInVMfromUnknown __tiv;
+    monitor->raw_notifyAll(THREAD);
+  } else {
+    monitor->raw_notifyAll(THREAD);
+  }
+#else
+  // Doesn't block so we don't need to do anything special here
+  monitor->raw_notifyAll(THREAD);
+#endif /* PROPER_TRANSITIONS */
+
+JVMPI_RAW_END
+
+// Use shared java_suspend.
+JVMPI_ENTRY(void, jvmpi::suspend_thread(JNIEnv *env))
+  if (env == NULL) return;
+  JavaThread *java_thread = JavaThread::thread_from_jni_environment(env);
+  if (java_thread == NULL) return;
+  // the thread has not yet run or has exited (not on threads list)
+  if (java_thread->threadObj() == NULL) return;
+  if (java_lang_Thread::thread(java_thread->threadObj()) == NULL) return;
+
+  // don't allow hidden thread suspend request.
+  if (java_thread->is_hidden_from_external_view()) {
+    return;
+  }
+
+  // Don't allow self-suspension, hprof agent expects to keep
+  // running so as to process resumes of all threads.
+  if (Thread::current() == (Thread *)java_thread) {
+    return;
+  }
+
+  {
+    MutexLockerEx ml(java_thread->SR_lock(), Mutex::_no_safepoint_check_flag);
+    if (java_thread->is_external_suspend()) {
+      // Don't allow nested external suspend requests. We can't return
+      // an error from this interface so just ignore the problem.
+      return;
+    }
+    if (java_thread->is_exiting()) { // thread is in the process of exiting
+      return;
+    }
+    java_thread->set_external_suspend();
+  }
+
+  //
+  // If a thread in state _thread_in_native is not immediately
+  // suspended, then a blocked RawMonitorEnter() call may enter
+  // the RawMonitor even if RawMonitorExit() is called after
+  // SuspendThread() returns. java_suspend() will catch threads
+  // in the process of exiting and will ignore them.
+  //
+  java_thread->java_suspend();
+
+  // It would be nice to have the following assertion in all the time,
+  // but it is possible for a racing resume request to have resumed
+  // this thread right after we suspended it. Temporarily enable this
+  // assertion if you are chasing a different kind of bug.
+  //
+  // assert(java_lang_Thread::thread(java_thread->threadObj()) == NULL ||
+  //   java_thread->is_being_ext_suspended(), "thread is not suspended");
+JVMPI_END
+
+// Use shared java_suspend.
+JVMPI_ENTRY(void, jvmpi::suspend_thread_list(jint reqCnt, JNIEnv **reqList, jint *results))
+
+  if (reqCnt <= 0 || reqList == NULL || results == NULL) {
+    // parameter problem so bail out
+    return;
+  }
+
+  int needSafepoint = 0;  // > 0 if we need a safepoint
+
+  for (int i = 0; i < reqCnt; i++) {
+    if (reqList[i] == NULL) {
+      results[i] = 10; // same as JVMDI_ERROR_INVALID_THREAD
+      continue;
+    }
+    JavaThread *java_thread = JavaThread::thread_from_jni_environment(reqList[i]);
+    if (java_thread == NULL) {
+      results[i] = 10; // same as JVMDI_ERROR_INVALID_THREAD
+      continue;
+    }
+    // the thread has not yet run or has exited (not on threads list)
+    if (java_thread->threadObj() == NULL) {
+      results[i] = 10; // same as JVMDI_ERROR_INVALID_THREAD
+      continue;
+    }
+    if (java_lang_Thread::thread(java_thread->threadObj()) == NULL) {
+      results[i] = 10; // same as JVMDI_ERROR_INVALID_THREAD
+      continue;
+    }
+    // don't allow hidden thread suspend request.
+    if (java_thread->is_hidden_from_external_view()) {
+      results[i] = 0;  // indicate successful suspend
+      continue;
+    }
+
+    // Don't allow self-suspension, hprof agent expects to keep
+    // running so as to process resumes of all threads.
+    if (Thread::current() == (Thread *)java_thread) {
+      results[i] = 10; // same as JVMDI_ERROR_INVALID_THREAD
+      continue;
+    }
+
+    {
+      MutexLockerEx ml(java_thread->SR_lock(), Mutex::_no_safepoint_check_flag);
+      if (java_thread->is_external_suspend()) {
+        // Don't allow nested external suspend requests. We can't return
+        // an error from this interface so just ignore the problem.
+        results[i] = 14; // same as JVMDI_ERROR_THREAD_SUSPENDED
+        continue;
+      }
+      if (java_thread->is_exiting()) { // thread is in the process of exiting
+        results[i] = 10; // same as JVMDI_ERROR_INVALID_THREAD
+        continue;
+      }
+      java_thread->set_external_suspend();
+    }
+
+    if (java_thread->thread_state() == _thread_in_native) {
+      // We need to try and suspend native threads here. Threads in
+      // other states will self-suspend on their next transition.
+      // java_suspend() will catch threads in the process of exiting
+      // and will ignore them.
+      java_thread->java_suspend();
+    } else {
+      needSafepoint++;
+    }
+
+    results[i] = 0;  // indicate successful suspend
+  }
+
+  if (needSafepoint > 0) {
+    VM_ForceSafepoint vfs;
+    VMThread::execute(&vfs);
+  }
+JVMPI_END
+
+// Use shared java_resume. Requires owning the Threads lock.
+JVMPI_ENTRY(void, jvmpi::resume_thread(JNIEnv *env))
+  JavaThread *java_thread;
+  if ((env) && (java_thread = JavaThread::thread_from_jni_environment(env))) {
+    MutexLocker ml(Threads_lock);
+
+    // don't allow hidden thread resume request.
+    if (java_thread->is_hidden_from_external_view()) {
+      return;
+    }
+
+    java_thread->java_resume();
+  }
+JVMPI_END
+
+// Use shared java_resume. Requires owning the Threads lock.
+JVMPI_ENTRY(void, jvmpi::resume_thread_list(jint reqCnt, JNIEnv **reqList, jint *results))
+
+  if (reqCnt <= 0 || reqList == NULL || results == NULL) {
+    // parameter problem so bail out
+    return;
+  }
+
+  for (int i = 0; i < reqCnt; i++) {
+    if (reqList[i] == NULL) {
+      results[i] = 10; // same as JVMDI_ERROR_INVALID_THREAD
+      continue;
+    }
+    JavaThread *java_thread = JavaThread::thread_from_jni_environment(reqList[i]);
+    if (java_thread == NULL) {
+      results[i] = 10; // same as JVMDI_ERROR_INVALID_THREAD
+      continue;
+    }
+    // don't allow hidden thread resume request.
+    if (java_thread->is_hidden_from_external_view()) {
+      results[i] = 0;  // indicate successful resume
+      continue;
+    }
+
+    {
+      MutexLocker ml(Threads_lock);
+      java_thread->java_resume();
+    }
+
+    results[i] = 0;  // indicate successful resume
+  }
+JVMPI_END
+
+// 2.0: redesign to match jvmdi. handle errors and more states
+JVMPI_ENTRY(jint, jvmpi::get_thread_status(JNIEnv *env))
+  jint res = JVMPI_THREAD_RUNNABLE;
+  JavaThread *tp;
+  { MutexLocker mu(Threads_lock);
+    if ((env) && (tp = JavaThread::thread_from_jni_environment(env))) {
+      JavaThreadState state;
+      ThreadState t_state;
+      if ((state = tp->thread_state()) && (tp->osthread()) && (t_state = tp->osthread()->get_state())) {
+
+          if (state == _thread_blocked|| state == _thread_blocked_trans) {
+              switch (t_state) {
+                  case CONDVAR_WAIT:
+                  case OBJECT_WAIT:
+                      res = JVMPI_THREAD_CONDVAR_WAIT;
+                      break;
+                  case MONITOR_WAIT:
+                      res = JVMPI_THREAD_MONITOR_WAIT;
+                      break;
+                  case SLEEPING:
+                  case ZOMBIE:
+                  case RUNNABLE    : // fall through
+                      res = JVMPI_THREAD_RUNNABLE;
+                      break;
+                  default:
+                      break;
+              }
+           }
+          if (tp->is_being_ext_suspended()) {
+              // internal suspend doesn't count for this flag
+              res = res | JVMPI_THREAD_SUSPENDED;
+          }
+          if (tp->osthread()->interrupted()) {
+              res = res | JVMPI_THREAD_INTERRUPTED;
+          }
+      }
+    }
+  } // release Threads_lock
+  return res;
+JVMPI_END
+
+
+// There is no provision in VM to check that; assume yes
+// Do NOT call thread_is_running - this calls thr_getstate
+// which only works if you have called thr_suspend.
+JVMPI_ENTRY(jboolean, jvmpi::thread_has_run(JNIEnv *env))
+  JavaThread* java_thread;
+  if ((env) && (java_thread = JavaThread::thread_from_jni_environment(env)))  {
+    return JNI_TRUE;
+  } else {
+    return JNI_FALSE;
+  }
+JVMPI_END
+
+
+JVMPI_ENTRY(void, jvmpi::run_gc())
+  Universe::heap()->collect(GCCause::_java_lang_system_gc);
+JVMPI_END
+
+
+JVMPI_ENTRY(void, jvmpi::profiler_exit(jint exit_code))
+  vm_exit(exit_code /*user_exit == true*/);
+  ShouldNotReachHere();
+JVMPI_END
+
+
+static void jvmpi_daemon_thread_entry(JavaThread* thread, TRAPS) {
+  assert(thread->is_jvmpi_daemon_thread(), "wrong thread");
+  JVMPIDaemonThread* daemon_thread = (JVMPIDaemonThread*)thread;
+
+  // ThreadToNativeFromVM takes care of changing thread_state, so safepoint code knows that
+  // we have left the VM
+  { JavaThread* thread = (JavaThread*) THREAD;
+    ThreadToNativeFromVM ttn(thread);
+    HandleMark hm(thread);
+
+    daemon_thread->function()(NULL);
+  }
+}
+
+
+JVMPI_ENTRY(jint, jvmpi::create_system_thread(char *name, jint priority, JVMPIDaemonFunction f))
+  const int invalid_res = JNI_ERR;
+  klassOop k = SystemDictionary::resolve_or_fail(vmSymbolHandles::java_lang_Thread(), true, CHECK_(invalid_res));
+  instanceKlassHandle klass (THREAD, k);
+  instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_(invalid_res));
+  Handle string = java_lang_String::create_from_str(name, CHECK_(invalid_res));
+
+  // Initialize thread_oop to put it into the system threadGroup
+  Handle thread_group (THREAD, Universe::system_thread_group());
+  JavaValue result(T_VOID);
+  JavaCalls::call_special(&result, thread_oop,
+                         klass,
+                         vmSymbolHandles::object_initializer_name(),
+                         vmSymbolHandles::threadgroup_string_void_signature(),
+                         thread_group,
+                         string,
+                         CHECK_(invalid_res));
+
+  { MutexLocker mu(Threads_lock);
+    JVMPIDaemonThread* daemon_thread = new JVMPIDaemonThread(&jvmpi_daemon_thread_entry, f);
+
+    // At this point it may be possible that no osthread was created for the
+    // JavaThread due to lack of memory.
+    if (daemon_thread == NULL || daemon_thread->osthread() == NULL) {
+      if (daemon_thread) delete daemon_thread;
+      return JNI_ERR;
+    }
+
+    ThreadPriority thread_priority = NoPriority;
+    switch (priority) {
+      case JVMPI_MINIMUM_PRIORITY: thread_priority = MinPriority ; break;
+      case JVMPI_MAXIMUM_PRIORITY: thread_priority = MaxPriority ; break;
+      case JVMPI_NORMAL_PRIORITY : thread_priority = NormPriority; break;
+      default: ShouldNotReachHere();
+    }
+
+    java_lang_Thread::set_thread(thread_oop(), daemon_thread);
+    java_lang_Thread::set_priority(thread_oop(), thread_priority);
+    java_lang_Thread::set_daemon(thread_oop());
+
+    daemon_thread->set_threadObj(thread_oop());
+    Threads::add(daemon_thread);
+    Thread::start(daemon_thread);
+
+  } // Release Threads_lock before calling up to agent code
+  // post_thread_start_event called from "run"
+
+  return JNI_OK;
+JVMPI_END
+
+
+JVMPI_ENTRY(jint, jvmpi::request_event(jint event_type, void *arg))
+  switch (event_type) {
+    case JVMPI_EVENT_OBJECT_ALLOC:
+      post_object_alloc_event((oop)arg, ((oop)arg)->size() * HeapWordSize,
+			      Universe::heap()->addr_to_arena_id(arg),
+			      JVMPI_REQUESTED_EVENT);
+      return JVMPI_SUCCESS;
+    case JVMPI_EVENT_THREAD_START:
+      post_thread_start_event(java_lang_Thread::thread((oop)arg), JVMPI_REQUESTED_EVENT);
+      return JVMPI_SUCCESS;
+    case JVMPI_EVENT_CLASS_LOAD:
+      post_class_load_event((oop)arg, JVMPI_REQUESTED_EVENT);
+      return JVMPI_SUCCESS;
+    case JVMPI_EVENT_OBJECT_DUMP:
+      post_object_dump_event((oop)arg, JVMPI_REQUESTED_EVENT);
+      return JVMPI_SUCCESS;
+    case JVMPI_EVENT_HEAP_DUMP: {
+      int heap_dump_level;
+
+      if (arg == NULL) {
+        heap_dump_level = JVMPI_DUMP_LEVEL_2;
+      } else {
+        heap_dump_level = ((JVMPI_HeapDumpArg*)arg)->heap_dump_level;
+      }
+
+      post_heap_dump_event_in_safepoint(heap_dump_level, JVMPI_REQUESTED_EVENT);
+      return JVMPI_SUCCESS;
+    }
+    case JVMPI_EVENT_MONITOR_DUMP:
+      post_monitor_dump_event_in_safepoint(JVMPI_REQUESTED_EVENT);
+      return JVMPI_SUCCESS;
+    default:
+      return JVMPI_NOT_AVAILABLE;
+  }
+JVMPI_END
+
+
+// Using JVMPI_RAW_ENTRY() to allow this API to be called from a
+// SIGPROF signal handler. ThreadInVMFromUnknown's use of a
+// HandleMarkCleaner will cleanup unexpected Handles when called
+// from a signal handler.
+JVMPI_RAW_ENTRY(void, jvmpi::set_thread_local_storage(JNIEnv *env, void *ptr))
+  if (env != NULL) {
+    JavaThread* jt = JavaThread::thread_from_jni_environment(env);
+    if (jt != NULL) {
+      jt->set_jvmpi_data(ptr);
+    }
+  }
+JVMPI_END
+
+
+// See set_thread_local_storage comment above.
+JVMPI_RAW_ENTRY(void*, jvmpi::get_thread_local_storage(JNIEnv *env))
+  if (env == NULL) return NULL;
+  JavaThread* jt = JavaThread::thread_from_jni_environment(env);
+  if (jt == NULL) return NULL;
+  return jt->jvmpi_data();
+JVMPI_END
+
+
+JVMPI_ENTRY(jobjectID, jvmpi::get_thread_object(JNIEnv *env))
+  if (env == NULL) return NULL;
+  return (jobjectID) JavaThread::thread_from_jni_environment(env)->threadObj();
+JVMPI_END
+
+
+JVMPI_ENTRY(jobjectID, jvmpi::get_method_class(jmethodID mid))
+  return (jobjectID) Klass::cast(JNIHandles::resolve_jmethod_id(mid)->method_holder())->java_mirror();
+JVMPI_END
+
+
+JVMPI_ENTRY(jobject, jvmpi::jobjectID_2_jobject(jobjectID jid))
+  assert(GC_locker::is_active(), "jobjectID_2_jobject may be called only with disabled GC");
+  Thread* thd = Thread::current();
+  assert(thd->is_Java_thread(), "call to jobjectID_2_jobject can only happen in a Java thread");
+
+  JavaThread* jthread = (JavaThread*)thd;
+
+  JNIEnv* env = jthread->jni_environment();
+  return JNIHandles::make_local(env, (oop)jid);
+JVMPI_END
+
+
+JVMPI_ENTRY(jobjectID, jvmpi::jobject_2_jobjectID(jobject jobj))
+  assert(GC_locker::is_active(), "jobject_2_jobjectID may be called only with disabled GC");
+  return (jobjectID)JNIHandles::resolve(jobj);
+JVMPI_END
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/prims/jvmpi.h	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,645 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)jvmpi.h	1.21 05/11/18 15:23:06 JVM"
+#endif
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+ */
+
+#ifndef _JAVASOFT_JVMPI_H_
+#define _JAVASOFT_JVMPI_H_
+
+#include "jni.h"
+
+#define JVMPI_VERSION_1   ((jint)0x10000001)  /* implied 0 for minor version */
+#define JVMPI_VERSION_1_1 ((jint)0x10000002)
+#define JVMPI_VERSION_1_2 ((jint)0x10000003)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+  typedef void (*jvmpi_void_function_of_void)(void *);
+#ifdef __cplusplus
+}
+#endif
+
+/****************************************************************
+ * Profiler interface data structures.
+ ****************************************************************/
+/* identifier types. */
+struct _jobjectID;
+typedef struct _jobjectID * jobjectID;       /* type of object ids */
+
+/* raw monitors */
+struct _JVMPI_RawMonitor;
+typedef struct _JVMPI_RawMonitor * JVMPI_RawMonitor;
+
+/* call frame */
+typedef struct {
+    jint lineno;                      /* line number in the source file */
+    jmethodID method_id;              /* method executed in this frame */
+} JVMPI_CallFrame;
+
+/* call trace */
+typedef struct {
+    JNIEnv *env_id;                   /* Env where trace was recorded */
+    jint num_frames;                  /* number of frames in this trace */
+    JVMPI_CallFrame *frames;          /* frames */
+} JVMPI_CallTrace;
+
+/* method */
+typedef struct {
+    char *method_name;                /* name of method */
+    char *method_signature;           /* signature of method */
+    jint start_lineno;                /* -1 if native, abstract .. */
+    jint end_lineno;                  /* -1 if native, abstract .. */
+    jmethodID method_id;              /* id assigned to this method */
+} JVMPI_Method;
+
+/* Field */
+typedef struct {
+    char *field_name;                 /* name of field */
+    char *field_signature;            /* signature of field */
+} JVMPI_Field;
+
+/* line number info for a compiled method */
+typedef struct {
+    jint offset;                      /* offset from beginning of method */
+    jint lineno;                      /* lineno from beginning of src file */
+} JVMPI_Lineno;
+
+/* event */
+typedef struct {
+    jint event_type;                  /* event_type */
+    JNIEnv *env_id;                   /* env where this event occured */
+
+    union {
+        struct {
+	    const char *class_name;   /* class name */
+	    char *source_name;        /* name of source file */
+	    jint num_interfaces;      /* number of interfaces implemented */
+  	    jint num_methods;         /* number of methods in the class */
+	    JVMPI_Method *methods;    /* methods */
+	    jint num_static_fields;   /* number of static fields */
+	    JVMPI_Field *statics;     /* static fields */
+	    jint num_instance_fields; /* number of instance fields */
+	    JVMPI_Field *instances;   /* instance fields */
+	    jobjectID class_id;       /* id of the class object */
+	} class_load;
+
+        struct {
+	    jobjectID class_id;       /* id of the class object */
+	} class_unload;
+
+        struct {
+	    unsigned char *class_data;        /* content of class file */
+	    jint class_data_len;              /* class file length */
+	    unsigned char *new_class_data;    /* instrumented class file */
+	    jint new_class_data_len;          /* new class file length */
+	    void * (*malloc_f)(unsigned int); /* memory allocation function */
+	} class_load_hook;
+
+        struct {
+            jint arena_id;
+	    jobjectID class_id;       /* id of object class */
+	    jint is_array;            /* JVMPI_NORMAL_OBJECT, ... */
+	    jint size;                /* size in number of bytes */
+	    jobjectID obj_id;         /* id assigned to this object */
+        } obj_alloc;
+
+        struct {
+	    jobjectID obj_id;         /* id of the object */
+	} obj_free;
+
+        struct {
+	    jint arena_id;            /* cur arena id */
+	    jobjectID obj_id;         /* cur object id */
+	    jint new_arena_id;        /* new arena id */
+	    jobjectID new_obj_id;     /* new object id */
+	} obj_move;
+
+        struct {
+	    jint arena_id;            /* id of arena */
+	    const char *arena_name;   /* name of arena */
+	} new_arena;
+
+        struct {
+	    jint arena_id;            /* id of arena */
+	} delete_arena;
+
+        struct {
+	    char *thread_name;        /* name of thread */
+	    char *group_name;         /* name of group */
+	    char *parent_name;        /* name of parent */
+	    jobjectID thread_id;      /* id of the thread object */
+	    JNIEnv *thread_env_id;
+        } thread_start;
+
+        struct {
+	    int dump_level;           /* level of the heap dump info */
+	    char *begin;              /* where all the root records begin,
+					 please see the heap dump buffer
+				         format described below */
+	    char *end;                /* where the object records end. */
+	    jint num_traces;          /* number of thread traces,
+				         0 if dump level = JVMPI_DUMP_LEVEL_0 */
+	    JVMPI_CallTrace *traces;  /* thread traces collected during
+					 heap dump */
+	} heap_dump;
+
+        struct {
+	    jobjectID obj_id;         /* object id */
+	    jobject ref_id;           /* id assigned to the globalref */
+	} jni_globalref_alloc;
+
+        struct {
+	    jobject ref_id;           /* id of the global ref */
+	} jni_globalref_free;
+
+        struct {
+	    jmethodID method_id;      /* method */
+	} method;
+
+        struct {
+	    jmethodID method_id;      /* id of method */
+	    jobjectID obj_id;         /* id of target object */
+	} method_entry2;
+
+        struct {
+	    jmethodID method_id;        /* id of compiled method */
+	    void *code_addr;            /* code start addr. in memory */
+	    jint code_size;             /* code size */
+	    jint lineno_table_size;     /* size of lineno table */
+	    JVMPI_Lineno *lineno_table; /* lineno info */
+	} compiled_method_load;
+
+        struct {
+	    jmethodID method_id;        /* id of unloaded compiled method */
+	} compiled_method_unload;
+
+        struct {
+            jmethodID method_id;  /* id of the method the instruction belongs to */
+            jint      offset;	  /* instruction offset in the method's bytecode */
+            union {
+	        struct {
+	            jboolean is_true; /* whether true or false branch is taken  */
+	        } if_info;
+	        struct {
+	            jint key;     /* top stack value used as an index */
+	            jint low;     /* min value of the index           */
+	            jint hi;      /* max value of the index           */
+	        } tableswitch_info;
+	        struct {
+		    jint chosen_pair_index; /* actually chosen pair index (0-based)
+                                             * if chosen_pair_index == pairs_total then
+                                             * the 'default' branch is taken
+                                             */
+		    jint pairs_total;       /* total number of lookupswitch pairs */
+	        } lookupswitch_info;
+            } u;
+        } instruction;
+
+        struct {
+	    char *begin;                /* beginning of dump buffer,
+					   see below for format */
+	    char *end;                  /* end of dump buffer */
+	    jint num_traces;            /* number of traces */
+	    JVMPI_CallTrace *traces;    /* traces of all threads */
+	    jint *threads_status;       /* status of all threads */
+	} monitor_dump;
+
+        struct {
+	    const char *name;           /* name of raw monitor */
+	    JVMPI_RawMonitor id;        /* id */
+	} raw_monitor;
+
+        struct {
+	    jobjectID object;           /* Java object */
+	} monitor;
+
+        struct {
+	    jobjectID object;           /* Java object */
+	    jlong timeout;              /* timeout period */
+	} monitor_wait;
+
+        struct {
+	    jlong used_objects;
+	    jlong used_object_space;
+	    jlong total_object_space;
+	} gc_info;
+
+        struct {
+	    jint data_len;
+	    char *data;
+	} object_dump;
+
+    } u;
+} JVMPI_Event;
+
+/* interface functions */
+typedef struct {
+    jint version;   /* JVMPI version */
+
+    /* ------interface implemented by the profiler------ */
+
+    /**
+     * Function called by the JVM to notify an event.
+     */
+    void (*NotifyEvent)(JVMPI_Event *event);
+
+    /* ------interface implemented by the JVM------ */
+
+    /**
+     * Function called by the profiler to enable/disable/send notification
+     * for a particular event type.
+     *
+     * event_type - event_type
+     * arg - event specific arg
+     *
+     * return JVMPI_NOT_AVAILABLE, JVMPI_SUCCESS or JVMPI_FAIL
+     */
+    jint (*EnableEvent)(jint event_type, void *arg);
+    jint (*DisableEvent)(jint event_type, void *arg);
+    jint (*RequestEvent)(jint event_type, void *arg);
+
+    /**
+     * Function called by the profiler to get a stack
+     * trace from the JVM.
+     *
+     * trace - trace data structure to be filled
+     * depth - maximum depth of the trace.
+     */
+    void (*GetCallTrace)(JVMPI_CallTrace *trace, jint depth);
+
+    /**
+     * Function called by profiler when it wants to exit/stop.
+     */
+    void (*ProfilerExit)(jint);
+
+    /**
+     * Utility functions provided by the JVM.
+     */
+    JVMPI_RawMonitor (*RawMonitorCreate)(char *lock_name);
+    void (*RawMonitorEnter)(JVMPI_RawMonitor lock_id);
+    void (*RawMonitorExit)(JVMPI_RawMonitor lock_id);
+    void (*RawMonitorWait)(JVMPI_RawMonitor lock_id, jlong ms);
+    void (*RawMonitorNotifyAll)(JVMPI_RawMonitor lock_id);
+    void (*RawMonitorDestroy)(JVMPI_RawMonitor lock_id);
+
+    /**
+     * Function called by the profiler to get the current thread's CPU time.
+     *
+     * return time in nanoseconds;
+     */
+    jlong (*GetCurrentThreadCpuTime)(void);
+
+    void (*SuspendThread)(JNIEnv *env);
+    void (*ResumeThread)(JNIEnv *env);
+    jint (*GetThreadStatus)(JNIEnv *env);
+    jboolean (*ThreadHasRun)(JNIEnv *env);
+
+    /* This function can be called safely only after JVMPI_EVENT_VM_INIT_DONE
+       notification by the JVM. */
+    jint (*CreateSystemThread)(char *name, jint priority, jvmpi_void_function_of_void f);
+
+    /* thread local storage access functions to avoid locking in time
+       critical functions */
+    void (*SetThreadLocalStorage)(JNIEnv *env_id, void *ptr);
+    void * (*GetThreadLocalStorage)(JNIEnv *env_id);
+
+    /* control GC */
+    void (*DisableGC)(void);
+    void (*EnableGC)(void);
+    void (*RunGC)(void);
+
+    jobjectID (*GetThreadObject)(JNIEnv *env);
+    jobjectID (*GetMethodClass)(jmethodID mid);
+
+    /* JNI <-> jobject conversions */
+    jobject   (*jobjectID2jobject)(jobjectID jid);
+    jobjectID (*jobject2jobjectID)(jobject jobj);
+
+    void (*SuspendThreadList)
+      (jint reqCount, JNIEnv **reqList, jint *results);
+    void (*ResumeThreadList)
+      (jint reqCount, JNIEnv **reqList, jint *results);
+} JVMPI_Interface;
+
+/* type of argument passed to RequestEvent for heap dumps */
+typedef struct {
+    jint heap_dump_level;
+} JVMPI_HeapDumpArg;
+
+/**********************************************************************
+ * Constants and formats used in JVM Profiler Interface.
+ **********************************************************************/
+/*
+ * Event type constants.
+ */
+
+#define JVMPI_EVENT_METHOD_ENTRY                  ((jint) 1)
+#define JVMPI_EVENT_METHOD_ENTRY2                 ((jint) 2)
+#define JVMPI_EVENT_METHOD_EXIT                   ((jint) 3)
+
+#define JVMPI_EVENT_OBJECT_ALLOC                  ((jint) 4)
+#define JVMPI_EVENT_OBJECT_FREE                   ((jint) 5)
+#define JVMPI_EVENT_OBJECT_MOVE                   ((jint) 6)
+
+#define JVMPI_EVENT_COMPILED_METHOD_LOAD          ((jint) 7)
+#define JVMPI_EVENT_COMPILED_METHOD_UNLOAD        ((jint) 8)
+
+#define JVMPI_EVENT_INSTRUCTION_START             ((jint) 9)
+
+#define JVMPI_EVENT_THREAD_START                  ((jint)33)
+#define JVMPI_EVENT_THREAD_END                    ((jint)34)
+
+#define JVMPI_EVENT_CLASS_LOAD_HOOK               ((jint)35)
+
+#define JVMPI_EVENT_HEAP_DUMP                     ((jint)37)
+#define JVMPI_EVENT_JNI_GLOBALREF_ALLOC           ((jint)38)
+#define JVMPI_EVENT_JNI_GLOBALREF_FREE            ((jint)39)
+#define JVMPI_EVENT_JNI_WEAK_GLOBALREF_ALLOC      ((jint)40)
+#define JVMPI_EVENT_JNI_WEAK_GLOBALREF_FREE       ((jint)41)
+#define JVMPI_EVENT_CLASS_LOAD                    ((jint)42)
+#define JVMPI_EVENT_CLASS_UNLOAD                  ((jint)43)
+#define JVMPI_EVENT_DATA_DUMP_REQUEST             ((jint)44)
+#define JVMPI_EVENT_DATA_RESET_REQUEST            ((jint)45)
+
+#define JVMPI_EVENT_JVM_INIT_DONE                 ((jint)46)
+#define JVMPI_EVENT_JVM_SHUT_DOWN                 ((jint)47)
+
+#define JVMPI_EVENT_ARENA_NEW                     ((jint)48)
+#define JVMPI_EVENT_ARENA_DELETE                  ((jint)49)
+
+#define JVMPI_EVENT_OBJECT_DUMP                   ((jint)50)
+
+#define JVMPI_EVENT_RAW_MONITOR_CONTENDED_ENTER   ((jint)51)
+#define JVMPI_EVENT_RAW_MONITOR_CONTENDED_ENTERED ((jint)52)
+#define JVMPI_EVENT_RAW_MONITOR_CONTENDED_EXIT    ((jint)53)
+#define JVMPI_EVENT_MONITOR_CONTENDED_ENTER       ((jint)54)
+#define JVMPI_EVENT_MONITOR_CONTENDED_ENTERED     ((jint)55)
+#define JVMPI_EVENT_MONITOR_CONTENDED_EXIT        ((jint)56)
+#define JVMPI_EVENT_MONITOR_WAIT                  ((jint)57)
+#define JVMPI_EVENT_MONITOR_WAITED                ((jint)58)
+#define JVMPI_EVENT_MONITOR_DUMP                  ((jint)59)
+
+#define JVMPI_EVENT_GC_START                      ((jint)60)
+#define JVMPI_EVENT_GC_FINISH                     ((jint)61)
+
+#define JVMPI_MAX_EVENT_TYPE_VAL                  ((jint)61)
+
+/* old definitions, to be removed */
+#define JVMPI_EVENT_LOAD_COMPILED_METHOD          ((jint) 7)
+#define JVMPI_EVENT_UNLOAD_COMPILED_METHOD        ((jint) 8)
+#define JVMPI_EVENT_NEW_ARENA                     ((jint)48)
+#define JVMPI_EVENT_DELETE_ARENA                  ((jint)49)
+#define JVMPI_EVENT_DUMP_DATA_REQUEST             ((jint)44)
+#define JVMPI_EVENT_RESET_DATA_REQUEST            ((jint)45)
+#define JVMPI_EVENT_OBJ_ALLOC                     ((jint) 4)
+#define JVMPI_EVENT_OBJ_FREE                      ((jint) 5)
+#define JVMPI_EVENT_OBJ_MOVE                      ((jint) 6)
+
+#define JVMPI_REQUESTED_EVENT                     ((jint)0x10000000)
+
+
+
+/*
+ * enabling/disabling event notification.
+ */
+/* results */
+#define JVMPI_SUCCESS                    ((jint)0)
+#define JVMPI_NOT_AVAILABLE              ((jint)1)
+#define JVMPI_FAIL                       ((jint)-1)
+
+/*
+ * Thread status
+ */
+enum {
+    JVMPI_THREAD_RUNNABLE = 1,
+    JVMPI_THREAD_MONITOR_WAIT,
+    JVMPI_THREAD_CONDVAR_WAIT
+};
+
+#define JVMPI_THREAD_SUSPENDED      0x8000
+#define JVMPI_THREAD_INTERRUPTED    0x4000
+
+/*
+ * Thread priority
+ */
+#define JVMPI_MINIMUM_PRIORITY	    1
+#define JVMPI_MAXIMUM_PRIORITY	    10
+#define JVMPI_NORMAL_PRIORITY	    5
+
+/*
+ * Object type constants.
+ */
+#define JVMPI_NORMAL_OBJECT	    ((jint)0)
+#define JVMPI_CLASS		    ((jint)2)
+#define JVMPI_BOOLEAN	            ((jint)4)
+#define JVMPI_CHAR                  ((jint)5)
+#define JVMPI_FLOAT                 ((jint)6)
+#define JVMPI_DOUBLE                ((jint)7)
+#define JVMPI_BYTE                  ((jint)8)
+#define JVMPI_SHORT                 ((jint)9)
+#define JVMPI_INT                   ((jint)10)
+#define JVMPI_LONG                  ((jint)11)
+
+/*
+ * Monitor dump constants.
+ */
+
+#define JVMPI_MONITOR_JAVA          0x01
+#define JVMPI_MONITOR_RAW           0x02
+
+/*
+ * Heap dump constants.
+ */
+#define JVMPI_GC_ROOT_UNKNOWN       0xff
+#define JVMPI_GC_ROOT_JNI_GLOBAL    0x01
+#define JVMPI_GC_ROOT_JNI_LOCAL     0x02
+#define JVMPI_GC_ROOT_JAVA_FRAME    0x03
+#define JVMPI_GC_ROOT_NATIVE_STACK  0x04
+#define JVMPI_GC_ROOT_STICKY_CLASS  0x05
+#define JVMPI_GC_ROOT_THREAD_BLOCK  0x06
+#define JVMPI_GC_ROOT_MONITOR_USED  0x07
+#define JVMPI_GC_ROOT_THREAD_OBJ    0x08
+
+#define JVMPI_GC_CLASS_DUMP         0x20
+#define JVMPI_GC_INSTANCE_DUMP      0x21
+#define JVMPI_GC_OBJ_ARRAY_DUMP     0x22
+#define JVMPI_GC_PRIM_ARRAY_DUMP    0x23
+
+/*
+ * Dump levels
+ */
+#define JVMPI_DUMP_LEVEL_0    ((jint)0)
+#define JVMPI_DUMP_LEVEL_1    ((jint)1)
+#define JVMPI_DUMP_LEVEL_2    ((jint)2)
+
+/* Types used in dumps -
+ *
+ * u1: 1 byte
+ * u2: 2 bytes
+ * u4: 4 bytes
+ * u8: 8 bytes
+ *
+ * ty: u1 where:
+ *     JVMPI_CLASS:   object
+ *     JVMPI_BOOLEAN: boolean
+ *     JVMPI_CHAR:    char
+ *     JVMPI_FLOAT:   float
+ *     JVMPI_DOUBLE:  double
+ *     JVMPI_BYTE:    byte
+ *     JVMPI_SHORT:   short
+ *     JVMPI_INT:     int
+ *     JVMPI_LONG:    long
+ *
+ * vl: values, exact type depends on the type of the value:
+ *     JVMPI_BOOLEAN & JVMPI_BYTE:   u1
+ *     JVMPI_SHORT & JVMPI_CHAR:     u2
+ *     JVMPI_INT & JVMPI_FLOAT:      u4
+ *     JVMPI_LONG & JVMPI_DOUBLE:    u8
+ *     JVMPI_CLASS:                  jobjectID
+ */
+
+/* Format of the monitor dump buffer:
+ *
+ *               u1                          monitor type
+ *
+ *               JVMPI_MONITOR_JAVA          Java monitor
+ *
+ *                          jobjectID        object
+ *                          JNIEnv *         owner thread
+ *                          u4               entry count
+ *                          u4               # of threads waiting to enter
+ *                          [JNIEnv *]*      threads waiting to enter
+ *                          u4               # of threads waiting to be notified
+ *                          [JNIEnv *]*      threads waiting to be notified
+ *
+ *               JVMPI_MONITOR_RAW           raw monitor
+ *
+ *                          char *           name
+ *                          JVMPI_RawMonitor raw monitor
+ *                          JNIEnv *         owner thread
+ *                          u4               entry count
+ *                          u4               # of threads waiting to enter
+ *                          [JNIEnv *]*      threads waiting to enter
+ *                          u4               # of threads waiting to be notified
+ *                          [JNIEnv *]*      threads waiting to be notified
+ */
+
+/* Format of the heap dump buffer depends on the dump level
+ * specified in the JVMPI_HeapDumpArg passed to RequestEvent as arg.
+ * The default is JVMPI_DUMP_LEVEL_2.
+ *
+ * JVMPI_DUMP_LEVEL_0:
+ *
+ *               u1                          object type (JVMPI_CLASS ...)
+ *               jobjectID                   object
+ *
+ * JVMPI_DUMP_LEVEL_1 and JVMPI_DUMP_LEVEL_2 use the following format:
+ * In the case of JVMPI_DUMP_LEVEL_1 the values of primitive fields in object
+ * instance dumps , the values of primitive statics in class dumps and the
+ * values of primitive arrays are excluded.  JVMPI_DUMP_LEVEL_2 includes the
+ * primitive values.
+ *
+ *               u1                          record type
+ *
+ *               JVMPI_GC_ROOT_UNKNOWN       unknown root
+ *
+ *                          jobjectID        object
+ *
+ *               JVMPI_GC_ROOT_JNI_GLOBAL    JNI global ref root
+ *
+ *                          jobjectID        object
+ *                          jobject          JNI global reference
+ *
+ *               JVMPI_GC_ROOT_JNI_LOCAL     JNI local ref
+ *
+ *                          jobjectID        object
+ *                          JNIEnv *         thread
+ *                          u4               frame # in stack trace (-1 for empty)
+ *
+ *               JVMPI_GC_ROOT_JAVA_FRAME    Java stack frame
+ *
+ *                          jobjectID        object
+ *                          JNIEnv *         thread
+ *                          u4               frame # in stack trace (-1 for empty)
+ *
+ *               JVMPI_GC_ROOT_NATIVE_STACK  Native stack
+ *
+ *                          jobjectID        object
+ *                          JNIEnv *         thread
+ *
+ *               JVMPI_GC_ROOT_STICKY_CLASS  System class
+ *
+ *                          jobjectID        class object
+ *
+ *               JVMPI_GC_ROOT_THREAD_BLOCK  Reference from thread block
+ *
+ *                          jobjectID        thread object
+ *                          JNIEnv *         thread
+ *
+ *               JVMPI_GC_ROOT_MONITOR_USED  Busy monitor
+ *
+ *                          jobjectID        object
+ *
+ *               JVMPI_GC_CLASS_DUMP         dump of a class object
+ *
+ *                          jobjectID        class
+ *                          jobjectID        super
+ *                          jobjectID        class loader
+ *                          jobjectID        signers
+ *                          jobjectID        protection domain
+ *                          jobjectID        class name
+ *                          void *           reserved
+ *
+ *                          u4               instance size (in bytes)
+ *
+ *                          [jobjectID]*     interfaces
+ *
+ *                          u2               size of constant pool
+ *                          [u2,             constant pool index,
+ *                           ty,             type,
+ *                           vl]*            value
+ *
+ *                          [vl]*            static field values
+ *
+ *               JVMPI_GC_INSTANCE_DUMP      dump of a normal object
+ *
+ *                          jobjectID        object
+ *                          jobjectID        class
+ *                          u4               number of bytes that follow
+ *                          [vl]*            instance field values (class, followed
+ *                                           by super, super's super ...)
+ *
+ *               JVMPI_GC_OBJ_ARRAY_DUMP     dump of an object array
+ *
+ *                          jobjectID        array object
+ *                          u4               number of elements
+ *                          jobjectID        element class
+ *                          [jobjectID]*     elements
+ *
+ *               JVMPI_GC_PRIM_ARRAY_DUMP    dump of a primitive array
+ *
+ *                          jobjectID        array object
+ *                          u4               number of elements
+ *                          ty               element type
+ *                          [vl]*            elements
+ *
+ */
+
+/* Format of the dump received in JVMPI_EVENT_OBJECT_DUMP:
+ * All the records have JVMPI_DUMP_LEVEL_2 information.
+ *
+ *               u1                          record type
+ *
+ *                     followed by a:
+ *
+ *                          JVMPI_GC_CLASS_DUMP,
+ *                          JVMPI_GC_INSTANCE_DUMP,
+ *                          JVMPI_GC_OBJ_ARRAY_DUMP, or
+ *                          JVMPI_GC_PRIM_ARRAY_DUMP record.
+ */
+
+#endif /* !_JAVASOFT_JVMPI_H_ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/prims/jvmpi.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,170 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)jvmpi.hpp	1.45 05/11/18 15:23:06 JVM"
+#endif
+//
+// Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+// SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+//
+
+#define JVMPI_EVENT_DISABLED        0
+#define JVMPI_EVENT_NOT_SUPPORTED  ((unsigned int)-1)
+#define JVMPI_EVENT_ENABLED        ((unsigned int)-2)
+
+#define JVMPI_PROFILING_OFF        0x00000000
+#define JVMPI_PROFILING_ON         0x80000000
+
+#define JVMPI_INVALID_CLASS ((oop)(-1))
+
+typedef struct {
+    methodOop method;               /* method being compiled */
+    void *code_addr;                /* virtual address of the the method */
+    jint code_size;                 /* size of compiled method in memory */
+    jint lineno_table_len;          /* number of lineno table entries */
+    JVMPI_Lineno *lineno_table;     /* pointer to beginning of line table */
+} compiled_method_t;
+
+
+class jvmpi : public AllStatic {
+ private:
+  // JVMPI interface data structure
+  static JVMPI_Interface jvmpi_interface;
+  static bool slow_allocation;
+
+  static void reset_jvmpi_allocation();
+
+  // To track if notification for a particular event type is enabled/disabled.
+  static unsigned int _event_flags_array[JVMPI_MAX_EVENT_TYPE_VAL+1];
+  static unsigned int _event_flags;
+
+  // initialization
+  static void initialize(int version);
+
+  // enable/disable event notification
+  static inline void enable_event(jint event_type);
+  static inline void disable_event(jint event_type);
+
+  static void post_event(JVMPI_Event* event);
+
+  static void post_event_common(JVMPI_Event* event);
+
+  static void post_event_vm_mode(JVMPI_Event* event, JavaThread* calling_thread);
+
+  // C heap memory allocation/free
+  static inline void* calloc(size_t size);
+  static inline void free(void* ptr);
+
+  // functions exported through the JVMPI
+  static void get_call_trace(JVMPI_CallTrace *trace, jint depth);
+  static jlong get_current_thread_cpu_time();
+  static JVMPI_RawMonitor raw_monitor_create(char *lock_name);
+  static void raw_monitor_enter(JVMPI_RawMonitor lock_id);
+  static void raw_monitor_exit(JVMPI_RawMonitor lock_id);
+  static void raw_monitor_destroy(JVMPI_RawMonitor lock_id);
+  static void raw_monitor_wait(JVMPI_RawMonitor lock_id, jlong ms);
+  static void raw_monitor_notify_all(JVMPI_RawMonitor lock_id);
+  static void suspend_thread(JNIEnv *env);
+  static void suspend_thread_list(jint reqCnt, JNIEnv **reqList, jint *results);
+  static void resume_thread(JNIEnv *env);
+  static void resume_thread_list(jint reqCnt, JNIEnv **reqList, jint *results);
+  static jint get_thread_status(JNIEnv *env);
+  static jboolean thread_has_run(JNIEnv *env);
+  static void run_gc();
+  static void profiler_exit(jint exit_code);
+  static jint create_system_thread(char *name, jint priority, jvmpi_void_function_of_void f);
+  static jint enable_event(jint event_type, void *arg);
+  static jint disable_event(jint event_type, void *arg);
+  static jint request_event(jint event_type, void *arg);
+  static void set_thread_local_storage(JNIEnv *env, void *ptr);
+  static void* get_thread_local_storage(JNIEnv *env);
+  static void disable_gc();
+  static void enable_gc();
+  static jobjectID get_thread_object(JNIEnv *env);
+  static jobjectID get_method_class(jmethodID mid);
+  static jobject   jobjectID_2_jobject(jobjectID);
+  static jobjectID jobject_2_jobjectID(jobject);
+
+ public:
+  // called from JNI to get the JVMPI interface function table
+  static JVMPI_Interface* GetInterface_1(int version);
+
+  // called before VM shutdown
+  static void disengage();
+
+  // test if jvmpi is enabled
+  static inline bool enabled();
+
+  // per event tests
+  static inline bool is_event_enabled(jint event_type);
+  static inline bool is_event_supported(jint event_type);
+
+  // support for (interpreter) code generation
+  static inline unsigned int* event_flags_array_at_addr(jint event_type);
+
+  // functions called by other parts of the VM to notify events
+  static void post_vm_initialization_events();
+  static void post_vm_initialized_event();
+  static void post_vm_death_event      ();
+
+  static void post_instruction_start_event(const frame& f);
+
+  static void post_thread_start_event  (JavaThread* thread, jint flag);
+  static void post_thread_start_event  (JavaThread* thread);
+  static void post_thread_end_event    (JavaThread* thread);
+
+  static void fillin_array_class_load_event  (oop k, JVMPI_Event *eventp);
+  static void fillin_class_load_event  (oop k, JVMPI_Event *eventp, bool fillin_jni_ids);
+  static void post_class_load_event    (oop k, jint flag);
+  static void post_class_load_event    (oop k);
+  // ptr to a function that takes an unsigned int param and returns a void *
+  typedef void * (*jvmpi_alloc_func_t)(unsigned int bytecnt);
+  static void post_class_load_hook_event(unsigned char **ptrP,
+    unsigned char **end_ptrP, jvmpi_alloc_func_t malloc_f);
+  static void *jvmpi_alloc(unsigned int bytecnt);
+  static void post_class_unload_events();
+  static void save_class_unload_event_info(oop k);
+
+  static void post_dump_event();
+
+  static void post_new_globalref_event(jobject ref, oop obj, bool post_jvmpi_event);
+  static void post_delete_globalref_event(jobject ref, bool post_jvmpi_event);
+  static void post_new_weakref_event(jobject ref, oop obj);
+  static void post_delete_weakref_event(jobject ref);
+
+  static void post_arena_new_event(int arena_id, const char* arena_name);
+  static void post_arena_delete_event(int arena_id);
+  static void post_object_alloc_event(oop obj, size_t bytesize, jint arena_id, jint flag);
+  static void post_object_free_event(oop obj);
+  static void post_object_move_event(oop oldobj, int old_arena, oop newobj, int new_arena);
+
+  static void post_method_entry2_event(methodOop m, oop receiver);
+  static void post_method_entry_event(methodOop m);
+  static void post_method_exit_event(methodOop m);
+
+  static void post_compiled_method_load_event(compiled_method_t *compiled_method_info);
+  static void post_compiled_method_unload_event(methodOop method);
+
+  static void post_monitor_contended_enter_event(void *mid);
+  static void post_monitor_contended_entered_event(void *mid);
+  static void post_monitor_contended_exit_event(void *mid);
+
+  static void post_monitor_wait_event(oop obj, jlong millis);
+  static void post_monitor_waited_event(oop obj, jlong millis);
+
+  static void post_raw_monitor_contended_enter_event(RawMonitor* o);
+  static void post_raw_monitor_contended_entered_event(RawMonitor* o);
+  static void post_raw_monitor_contended_exit_event(RawMonitor* o);
+
+  static void post_gc_start_event();
+  static void post_gc_finish_event(jlong used_obj_space, jlong total_obj_space);
+
+  static void post_trace_instr_event(unsigned char *pc, unsigned char opcode);
+  static void post_trace_if_event(unsigned char *pc, int is_true);
+  static void post_trace_tableswitch_event(unsigned char *pc, int key, int low, int hi);
+  static void post_trace_lookupswitch_event(unsigned char *pc,
+                                            int chosen_pair_index,
+                                            int pairs_total);
+
+  static void post_object_dump_event(oop obj, int flag);
+  static void post_heap_dump_event_in_safepoint(int level, int flag);
+  static void post_monitor_dump_event_in_safepoint(int flag);
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/prims/jvmpi.inline.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,60 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)jvmpi.inline.hpp	1.15 05/11/18 15:23:06 JVM"
+#endif
+//
+// Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+// SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+//
+
+inline bool jvmpi::is_event_enabled(jint event_type) {
+  return (event_type >= 31
+	  ? (enabled() && (_event_flags_array[event_type] == JVMPI_EVENT_ENABLED))
+	  : (_event_flags & (1 << event_type)) != 0);
+}
+
+
+inline void* jvmpi::calloc(size_t size) {
+  void* p = os::malloc(size);
+  if (p == NULL) {
+    vm_exit_out_of_memory(size, "jvmpi::malloc");
+  }
+  memset(p, 0, size);
+  return p;
+}
+
+
+inline void jvmpi::free(void* ptr) {
+  os::free(ptr);
+}
+
+
+inline void jvmpi::enable_event(jint event_type) {
+  if (event_type < 31) {
+    _event_flags |= 1 << event_type;
+  }
+  _event_flags_array[event_type] = JVMPI_EVENT_ENABLED;
+}
+
+
+inline void jvmpi::disable_event(jint event_type) {
+  if (event_type < 31) {
+    _event_flags &= ~(1 << event_type);
+  }
+  _event_flags_array[event_type] = JVMPI_EVENT_DISABLED;
+}
+
+
+inline bool jvmpi::enabled() {
+  return !!(_event_flags & JVMPI_PROFILING_ON);
+}
+
+
+inline bool jvmpi::is_event_supported(jint event_type) {
+  return ((event_type <= JVMPI_MAX_EVENT_TYPE_VAL) &&
+	  (_event_flags_array[event_type] != JVMPI_EVENT_NOT_SUPPORTED));
+}
+
+
+inline unsigned int* jvmpi::event_flags_array_at_addr(jint event_type) {
+  return &_event_flags_array[event_type];
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/prims/rawMonitor.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -0,0 +1,37 @@
+#ifdef USE_PRAGMA_IDENT_HDR
+#pragma ident "@(#)rawMonitor.hpp	1.8 05/11/18 15:23:16 JVM"
+#endif
+//
+// Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+// SUN PROPRIETARY/CONFIDENTIAL.  Use is subject to license terms.
+//
+
+//
+//
+// class RawMonitor
+// Used by all JVMPI RawMonitor methods:
+//   (CreateRawMonitor, EnterRawMonitor, etc.)
+//
+// Wrapper for ObjectMonitor class that saves the Monitor's name
+// and links thread's owned raw monitors
+
+class RawMonitor: public ObjectMonitor {
+private:
+  int           _magic;
+  char *        _name;
+  // maintaining list of locked raw monitors
+  RawMonitor* _rmnext;
+  RawMonitor* _rmprev;
+
+public:
+  RawMonitor(const char *name, const int magic);
+  ~RawMonitor();
+  int       magic()                         { return _magic;  }
+  void      print(outputStream& out)        { out.print(_name); }
+  RawMonitor* next_raw() const              { return _rmnext; }
+  const char *name() const                  { return _name; }
+  void add_to_locked_list();
+  void remove_from_locked_list();
+  int       raw_destroy();
+};
+
--- a/hotspot/src/share/vm/runtime/mutex.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/runtime/mutex.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,7 +71,12 @@
 #ifdef SPARC        // big
  #define _LSBINDEX (sizeof(intptr_t)-1)
 #else
- #error "unknown architecture"
+#include <endian.h>
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ #define _LSBINDEX 0
+#else
+ #define _LSBINDEX (sizeof(intptr_t)-1)
+#endif
 #endif
 #endif
 #endif
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -109,6 +109,15 @@
     tty->print_cr ("Total IC misses: %7d", tot_misses);
   }
 }
+
+void SharedRuntime::print_int(int i) {
+	tty->print("T0 = %x\n", i);
+}
+
+void SharedRuntime::print_str(char *str) {
+	tty->print("%s", str);
+}
+
 #endif // PRODUCT

 #ifndef SERIALGC
@@ -550,6 +559,11 @@
   address target_pc = NULL;

   if (Interpreter::contains(pc)) {
+#if 0
+#ifdef LOONGSONDEBUG
+	printf("handle exception in interpreter\n");
+#endif
+#endif
 #ifdef CC_INTERP
     // C++ interpreter doesn't throw implicit exceptions
     ShouldNotReachHere();
@@ -562,6 +576,12 @@
     }
 #endif // !CC_INTERP
   } else {
+#if 0
+#ifdef LOONGSONDEBUG
+	printf("handle exception in compiled\n");
+#endif
+#endif
+
     switch (exception_kind) {
       case STACK_OVERFLOW: {
         // Stack overflow only occurs upon frame setup; the callee is
@@ -581,7 +601,13 @@
           // exception and begin dispatching it in the caller. Since
           // the caller was at a call site, it's safe to destroy all
           // caller-saved registers, as these entry points do.
-          VtableStub* vt_stub = VtableStubs::stub_containing(pc);
+#if 0
+#ifdef LOONGSONDEBUG
+		printf("vtableStubs contains pc\n");
+#endif
+#endif
+
+         VtableStub* vt_stub = VtableStubs::stub_containing(pc);

           // If vt_stub is NULL, then return NULL to signal handler to report the SEGV error.
           if (vt_stub == NULL) return NULL;
@@ -593,7 +619,12 @@
             return StubRoutines::throw_NullPointerException_at_call_entry();
           }
         } else {
-          CodeBlob* cb = CodeCache::find_blob(pc);
+#if 0
+#ifdef LOONGSONDEBUG
+		printf("vtableStubs not contains pc\n");
+#endif
+#endif
+         CodeBlob* cb = CodeCache::find_blob(pc);

           // If code blob is NULL, then return NULL to signal handler to report the SEGV error.
           if (cb == NULL) return NULL;
@@ -807,7 +838,6 @@

   // last java frame on stack (which includes native call frames)
   vframeStream vfst(thread, true);  // Do not skip and javaCalls
-
   return find_callee_info_helper(thread, vfst, bc, callinfo, CHECK_(Handle()));
 }

@@ -847,6 +877,7 @@
       THROW_(vmSymbols::java_lang_NoSuchMethodException(), nullHandle);
     }
     // Retrieve from a compiled argument list
+    oop recv = callerFrame.retrieve_receiver(&reg_map2);
     receiver = Handle(THREAD, callerFrame.retrieve_receiver(&reg_map2));

     if (receiver.is_null()) {
@@ -1294,7 +1325,6 @@
   // Do nothing if the frame isn't a live compiled frame.
   // nmethod could be deoptimized by the time we get here
   // so no update to the caller is needed.
-
   if (caller.is_compiled_frame() && !caller.is_deoptimized_frame()) {

     address pc = caller.pc();
--- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -285,6 +286,9 @@

   // Ditto except for calling C
   static int c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed);
+#ifdef MIPS32
+  static int c_calling_convention_jni(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed);
+#endif

   // Generate I2C and C2I adapters. These adapters are simple argument marshalling
   // blobs. Unlike adapters in the tiger and earlier releases the code in these
@@ -471,7 +475,8 @@
   static void print_call_statistics(int comp_total);
   static void print_statistics();
   static void print_ic_miss_histogram();
-
+  static void print_int(int i);
+  static void print_str(char *str);
 #endif // PRODUCT
 };
--- a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -568,6 +568,14 @@
  *         then                   3    2
  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
  */
+#ifdef MIPS32 // workaround for the conflict with register names S0-S6
+#undef S1
+#undef S2
+#undef S3
+#undef S4
+#undef S5
+#undef S6
+#endif

 static const double
 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
--- a/hotspot/src/share/vm/runtime/signature.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/runtime/signature.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,131 +40,131 @@
 // iterator implemented in opto/type.cpp, TypeTuple::make().

 class SignatureIterator: public ResourceObj {
- protected:
-  symbolHandle _signature;             // the signature to iterate over
-  int          _index;                 // the current character index (only valid during iteration)
-  int          _parameter_index;       // the current parameter index (0 outside iteration phase)
-  BasicType    _return_type;
+	protected:
+		symbolHandle _signature;             // the signature to iterate over
+		int          _index;                 // the current character index (only valid during iteration)
+		int          _parameter_index;       // the current parameter index (0 outside iteration phase)
+		BasicType    _return_type;

-  void expect(char c);
-  void skip_optional_size();
-  int  parse_type();                   // returns the parameter size in words (0 for void)
-  void check_signature_end();
+		void expect(char c);
+		void skip_optional_size();
+		int  parse_type();                   // returns the parameter size in words (0 for void)
+		void check_signature_end();

- public:
-  // Definitions used in generating and iterating the
-  // bit field form of the signature generated by the
-  // Fingerprinter.
-  enum {
-    static_feature_size    = 1,
-    result_feature_size    = 4,
-    result_feature_mask    = 0xF,
-    parameter_feature_size = 4,
-    parameter_feature_mask = 0xF,
+	public:
+		// Definitions used in generating and iterating the
+		// bit field form of the signature generated by the
+		// Fingerprinter.
+		enum {
+			static_feature_size    = 1,
+			result_feature_size    = 4,
+			result_feature_mask    = 0xF,
+			parameter_feature_size = 4,
+			parameter_feature_mask = 0xF,

-      bool_parm            = 1,
-      byte_parm            = 2,
-      char_parm            = 3,
-      short_parm           = 4,
-      int_parm             = 5,
-      long_parm            = 6,
-      float_parm           = 7,
-      double_parm          = 8,
-      obj_parm             = 9,
-      done_parm            = 10,  // marker for end of parameters
+			bool_parm            = 1,
+			byte_parm            = 2,
+			char_parm            = 3,
+			short_parm           = 4,
+			int_parm             = 5,
+			long_parm            = 6,
+			float_parm           = 7,
+			double_parm          = 8,
+			obj_parm             = 9,
+			done_parm            = 10,  // marker for end of parameters

-    // max parameters is wordsize minus
-    //    The sign bit, termination field, the result and static bit fields
-    max_size_of_parameters = (BitsPerLong-1 -
-                              result_feature_size - parameter_feature_size -
-                              static_feature_size) / parameter_feature_size
-  };
+			// max parameters is wordsize minus
+			//    The sign bit, termination field, the result and static bit fields
+			max_size_of_parameters = (BitsPerLong-1 -
+					result_feature_size - parameter_feature_size -
+					static_feature_size) / parameter_feature_size
+		};

-  // Constructors
-  SignatureIterator(symbolOop signature);
-  SignatureIterator(Thread *thread, symbolOop signature);
-  SignatureIterator(symbolHandle signature);
+		// Constructors
+		SignatureIterator(symbolOop signature);
+		SignatureIterator(Thread *thread, symbolOop signature);
+		SignatureIterator(symbolHandle signature);

-  // Iteration
-  void dispatch_field();               // dispatches once for field signatures
-  void iterate_parameters();           // iterates over parameters only
-  void iterate_parameters( uint64_t fingerprint );
-  void iterate_returntype();           // iterates over returntype only
-  void iterate();                      // iterates over whole signature
-  // Returns the word index of the current parameter;
-  int  parameter_index() const         { return _parameter_index; }
-  bool is_return_type() const          { return parameter_index() < 0; }
-  BasicType get_ret_type() const       { return _return_type; }
+		// Iteration
+		void dispatch_field();               // dispatches once for field signatures
+		void iterate_parameters();           // iterates over parameters only
+		void iterate_parameters( uint64_t fingerprint );
+		void iterate_returntype();           // iterates over returntype only
+		void iterate();                      // iterates over whole signature
+		// Returns the word index of the current parameter;
+		int  parameter_index() const         { return _parameter_index; }
+		bool is_return_type() const          { return parameter_index() < 0; }
+		BasicType get_ret_type() const       { return _return_type; }

-  // Basic types
-  virtual void do_bool  ()             = 0;
-  virtual void do_char  ()             = 0;
-  virtual void do_float ()             = 0;
-  virtual void do_double()             = 0;
-  virtual void do_byte  ()             = 0;
-  virtual void do_short ()             = 0;
-  virtual void do_int   ()             = 0;
-  virtual void do_long  ()             = 0;
-  virtual void do_void  ()             = 0;
+		// Basic types
+		virtual void do_bool  ()             = 0;
+		virtual void do_char  ()             = 0;
+		virtual void do_float ()             = 0;
+		virtual void do_double()             = 0;
+		virtual void do_byte  ()             = 0;
+		virtual void do_short ()             = 0;
+		virtual void do_int   ()             = 0;
+		virtual void do_long  ()             = 0;
+		virtual void do_void  ()             = 0;

-  // Object types (begin indexes the first character of the entry, end indexes the first character after the entry)
-  virtual void do_object(int begin, int end) = 0;
-  virtual void do_array (int begin, int end) = 0;
+		// Object types (begin indexes the first character of the entry, end indexes the first character after the entry)
+		virtual void do_object(int begin, int end) = 0;
+		virtual void do_array (int begin, int end) = 0;
 };


 // Specialized SignatureIterators: Used to compute signature specific values.

 class SignatureTypeNames : public SignatureIterator {
- protected:
-  virtual void type_name(const char* name)   = 0;
+	protected:
+		virtual void type_name(const char* name)   = 0;

-  void do_bool()                       { type_name("jboolean"); }
-  void do_char()                       { type_name("jchar"   ); }
-  void do_float()                      { type_name("jfloat"  ); }
-  void do_double()                     { type_name("jdouble" ); }
-  void do_byte()                       { type_name("jbyte"   ); }
-  void do_short()                      { type_name("jshort"  ); }
-  void do_int()                        { type_name("jint"    ); }
-  void do_long()                       { type_name("jlong"   ); }
-  void do_void()                       { type_name("void"    ); }
-  void do_object(int begin, int end)   { type_name("jobject" ); }
-  void do_array (int begin, int end)   { type_name("jobject" ); }
+		void do_bool()                       { type_name("jboolean"); }
+		void do_char()                       { type_name("jchar"   ); }
+		void do_float()                      { type_name("jfloat"  ); }
+		void do_double()                     { type_name("jdouble" ); }
+		void do_byte()                       { type_name("jbyte"   ); }
+		void do_short()                      { type_name("jshort"  ); }
+		void do_int()                        { type_name("jint"    ); }
+		void do_long()                       { type_name("jlong"   ); }
+		void do_void()                       { type_name("void"    ); }
+		void do_object(int begin, int end)   { type_name("jobject" ); }
+		void do_array (int begin, int end)   { type_name("jobject" ); }

- public:
-  SignatureTypeNames(symbolHandle signature) : SignatureIterator(signature) {}
+	public:
+		SignatureTypeNames(symbolHandle signature) : SignatureIterator(signature) {}
 };


 class SignatureInfo: public SignatureIterator {
- protected:
-  bool      _has_iterated;             // need this because iterate cannot be called in constructor (set is virtual!)
-  bool      _has_iterated_return;
-  int       _size;
+	protected:
+		bool      _has_iterated;             // need this because iterate cannot be called in constructor (set is virtual!)
+		bool      _has_iterated_return;
+		int       _size;

-  void lazy_iterate_parameters()       { if (!_has_iterated) { iterate_parameters(); _has_iterated = true; } }
-  void lazy_iterate_return()           { if (!_has_iterated_return) { iterate_returntype(); _has_iterated_return = true; } }
+		void lazy_iterate_parameters()       { if (!_has_iterated) { iterate_parameters(); _has_iterated = true; } }
+		void lazy_iterate_return()           { if (!_has_iterated_return) { iterate_returntype(); _has_iterated_return = true; } }

-  virtual void set(int size, BasicType type) = 0;
+		virtual void set(int size, BasicType type) = 0;

-  void do_bool  ()                     { set(T_BOOLEAN_size, T_BOOLEAN); }
-  void do_char  ()                     { set(T_CHAR_size   , T_CHAR   ); }
-  void do_float ()                     { set(T_FLOAT_size  , T_FLOAT  ); }
-  void do_double()                     { set(T_DOUBLE_size , T_DOUBLE ); }
-  void do_byte  ()                     { set(T_BYTE_size   , T_BYTE   ); }
-  void do_short ()                     { set(T_SHORT_size  , T_SHORT  ); }
-  void do_int   ()                     { set(T_INT_size    , T_INT    ); }
-  void do_long  ()                     { set(T_LONG_size   , T_LONG   ); }
-  void do_void  ()                     { set(T_VOID_size   , T_VOID   ); }
-  void do_object(int begin, int end)   { set(T_OBJECT_size , T_OBJECT ); }
-  void do_array (int begin, int end)   { set(T_ARRAY_size  , T_ARRAY  ); }
+		void do_bool  ()                     { set(T_BOOLEAN_size, T_BOOLEAN); }
+		void do_char  ()                     { set(T_CHAR_size   , T_CHAR   ); }
+		void do_float ()                     { set(T_FLOAT_size  , T_FLOAT  ); }
+		void do_double()                     { set(T_DOUBLE_size , T_DOUBLE ); }
+		void do_byte  ()                     { set(T_BYTE_size   , T_BYTE   ); }
+		void do_short ()                     { set(T_SHORT_size  , T_SHORT  ); }
+		void do_int   ()                     { set(T_INT_size    , T_INT    ); }
+		void do_long  ()                     { set(T_LONG_size   , T_LONG   ); }
+		void do_void  ()                     { set(T_VOID_size   , T_VOID   ); }
+		void do_object(int begin, int end)   { set(T_OBJECT_size , T_OBJECT ); }
+		void do_array (int begin, int end)   { set(T_ARRAY_size  , T_ARRAY  ); }

- public:
-  SignatureInfo(symbolHandle signature) : SignatureIterator(signature) {
-    _has_iterated = _has_iterated_return = false;
-    _size         = 0;
-    _return_type  = T_ILLEGAL;
-  }
+	public:
+		SignatureInfo(symbolHandle signature) : SignatureIterator(signature) {
+			_has_iterated = _has_iterated_return = false;
+			_size         = 0;
+			_return_type  = T_ILLEGAL;
+		}

 };

@@ -171,246 +172,251 @@
 // Specialized SignatureIterator: Used to compute the argument size.

 class ArgumentSizeComputer: public SignatureInfo {
- private:
-  void set(int size, BasicType type)   { _size += size; }
- public:
-  ArgumentSizeComputer(symbolHandle signature) : SignatureInfo(signature) {}
+	private:
+		void set(int size, BasicType type)   { _size += size; }
+	public:
+		ArgumentSizeComputer(symbolHandle signature) : SignatureInfo(signature) {}

-  int       size()                     { lazy_iterate_parameters(); return _size; }
+		int       size()                     { lazy_iterate_parameters(); return _size; }
 };


 class ArgumentCount: public SignatureInfo {
- private:
-  void set(int size, BasicType type)   { _size ++; }
- public:
-  ArgumentCount(symbolHandle signature) : SignatureInfo(signature) {}
+	private:
+		void set(int size, BasicType type)   { _size ++; }
+	public:
+		ArgumentCount(symbolHandle signature) : SignatureInfo(signature) {}

-  int       size()                     { lazy_iterate_parameters(); return _size; }
+		int       size()                     { lazy_iterate_parameters(); return _size; }
 };


 // Specialized SignatureIterator: Used to compute the result type.

 class ResultTypeFinder: public SignatureInfo {
- private:
-  void set(int size, BasicType type)   { _return_type = type; }
- public:
-  BasicType type()                     { lazy_iterate_return(); return _return_type; }
+	private:
+		void set(int size, BasicType type)   { _return_type = type; }
+	public:
+		BasicType type()                     { lazy_iterate_return(); return _return_type; }

-  ResultTypeFinder(symbolHandle signature) : SignatureInfo(signature) {}
+		ResultTypeFinder(symbolHandle signature) : SignatureInfo(signature) {}
 };


 // Fingerprinter computes a unique ID for a given method. The ID
 // is a bitvector characterizing the methods signature (incl. the receiver).
 class Fingerprinter: public SignatureIterator {
- private:
-  uint64_t _fingerprint;
-  int _shift_count;
-  methodHandle mh;
+	private:
+		uint64_t _fingerprint;
+		int _shift_count;
+		methodHandle mh;

- public:
+	public:

-  void do_bool()    { _fingerprint |= (((uint64_t)bool_parm) << _shift_count); _shift_count += parameter_feature_size; }
-  void do_char()    { _fingerprint |= (((uint64_t)char_parm) << _shift_count); _shift_count += parameter_feature_size; }
-  void do_byte()    { _fingerprint |= (((uint64_t)byte_parm) << _shift_count); _shift_count += parameter_feature_size; }
-  void do_short()   { _fingerprint |= (((uint64_t)short_parm) << _shift_count); _shift_count += parameter_feature_size; }
-  void do_int()     { _fingerprint |= (((uint64_t)int_parm) << _shift_count); _shift_count += parameter_feature_size; }
-  void do_long()    { _fingerprint |= (((uint64_t)long_parm) << _shift_count); _shift_count += parameter_feature_size; }
-  void do_float()   { _fingerprint |= (((uint64_t)float_parm) << _shift_count); _shift_count += parameter_feature_size; }
-  void do_double()  { _fingerprint |= (((uint64_t)double_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_bool()    { _fingerprint |= (((uint64_t)bool_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_char()    { _fingerprint |= (((uint64_t)char_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_byte()    { _fingerprint |= (((uint64_t)byte_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_short()   { _fingerprint |= (((uint64_t)short_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_int()     { _fingerprint |= (((uint64_t)int_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_long()    { _fingerprint |= (((uint64_t)long_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_float()   { _fingerprint |= (((uint64_t)float_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_double()  { _fingerprint |= (((uint64_t)double_parm) << _shift_count); _shift_count += parameter_feature_size; }

-  void do_object(int begin, int end)  { _fingerprint |= (((uint64_t)obj_parm) << _shift_count); _shift_count += parameter_feature_size; }
-  void do_array (int begin, int end)  { _fingerprint |= (((uint64_t)obj_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_object(int begin, int end)  { _fingerprint |= (((uint64_t)obj_parm) << _shift_count); _shift_count += parameter_feature_size; }
+		void do_array (int begin, int end)  { _fingerprint |= (((uint64_t)obj_parm) << _shift_count); _shift_count += parameter_feature_size; }

-  void do_void()    { ShouldNotReachHere(); }
+		void do_void()    { ShouldNotReachHere(); }

-  Fingerprinter(methodHandle method) : SignatureIterator(method->signature()) {
-    mh = method;
-    _fingerprint = 0;
-  }
+		Fingerprinter(methodHandle method) : SignatureIterator(method->signature()) {
+			mh = method;
+			_fingerprint = 0;
+		}

-  Fingerprinter(Thread *thread, methodHandle method) : SignatureIterator(thread, method->signature()) {
-    mh = method;
-    _fingerprint = 0;
-  }
+		Fingerprinter(Thread *thread, methodHandle method) : SignatureIterator(thread, method->signature()) {
+			mh = method;
+			_fingerprint = 0;
+		}

-  uint64_t fingerprint() {
-    // See if we fingerprinted this method already
-    if (mh->constMethod()->fingerprint() != CONST64(0)) {
-      return mh->constMethod()->fingerprint();
-    }
+		uint64_t fingerprint() {
+			// See if we fingerprinted this method already
+			if (mh->constMethod()->fingerprint() != CONST64(0)) {
+				return mh->constMethod()->fingerprint();
+			}

-    if (mh->size_of_parameters() > max_size_of_parameters ) {
-      _fingerprint = UCONST64(-1);
-      mh->constMethod()->set_fingerprint(_fingerprint);
-      return _fingerprint;
-    }
+			if (mh->size_of_parameters() > max_size_of_parameters ) {
+				_fingerprint = UCONST64(-1);
+				mh->constMethod()->set_fingerprint(_fingerprint);
+				return _fingerprint;
+			}

-    assert( (int)mh->result_type() <= (int)result_feature_mask, "bad result type");
-    _fingerprint = mh->result_type();
-    _fingerprint <<= static_feature_size;
-    if (mh->is_static())  _fingerprint |= 1;
-    _shift_count = result_feature_size + static_feature_size;
-    iterate_parameters();
-    _fingerprint |= ((uint64_t)done_parm) << _shift_count;// mark end of sig
-    mh->constMethod()->set_fingerprint(_fingerprint);
-    return _fingerprint;
-  }
+			assert( (int)mh->result_type() <= (int)result_feature_mask, "bad result type");
+			_fingerprint = mh->result_type();
+			_fingerprint <<= static_feature_size;
+			if (mh->is_static())  _fingerprint |= 1;
+			_shift_count = result_feature_size + static_feature_size;
+			iterate_parameters();
+			_fingerprint |= ((uint64_t)done_parm) << _shift_count;// mark end of sig
+			mh->constMethod()->set_fingerprint(_fingerprint);
+			return _fingerprint;
+		}
 };


 // Specialized SignatureIterator: Used for native call purposes

 class NativeSignatureIterator: public SignatureIterator {
- private:
-  methodHandle _method;
-// We need seperate JNI and Java offset values because in 64 bit mode,
-// the argument offsets are not in sync with the Java stack.
-// For example a long takes up 1 "C" stack entry but 2 Java stack entries.
-  int          _offset;                // The java stack offset
-  int          _prepended;             // number of prepended JNI parameters (1 JNIEnv, plus 1 mirror if static)
-  int          _jni_offset;            // the current parameter offset, starting with 0
-
-  void do_bool  ()                     { pass_int();    _jni_offset++; _offset++;       }
-  void do_char  ()                     { pass_int();    _jni_offset++; _offset++;       }
-#ifdef _LP64
-  void do_float ()                     { pass_float();  _jni_offset++; _offset++;       }
-  void do_double()                     { pass_double(); _jni_offset++; _offset += 2;    }
-#else
-  void do_float ()                     { pass_int();    _jni_offset++; _offset++;       }
-  void do_double()                     { pass_double(); _jni_offset += 2; _offset += 2; }
+	private:
+		methodHandle _method;
+		// We need seperate JNI and Java offset values because in 64 bit mode,
+		// the argument offsets are not in sync with the Java stack.
+		// For example a long takes up 1 "C" stack entry but 2 Java stack entries.
+#ifdef MIPS32
+	protected:
 #endif
-  void do_byte  ()                     { pass_int();    _jni_offset++; _offset++;       }
-  void do_short ()                     { pass_int();    _jni_offset++; _offset++;       }
-  void do_int   ()                     { pass_int();    _jni_offset++; _offset++;       }
+		int          _offset;                // The java stack offset
+		int          _prepended;             // number of prepended JNI parameters (1 JNIEnv, plus 1 mirror if static)
+		int          _jni_offset;            // the current parameter offset, starting with 0
+#ifdef MIPS32
+	private:
+#endif
+		void do_bool  ()                     { pass_int();    _jni_offset++; _offset++;       }
+		void do_char  ()                     { pass_int();    _jni_offset++; _offset++;       }
 #ifdef _LP64
-  void do_long  ()                     { pass_long();   _jni_offset++; _offset += 2;    }
+		void do_float ()                     { pass_float();  _jni_offset++; _offset++;       }
+		void do_double()                     { pass_double(); _jni_offset++; _offset += 2;    }
 #else
-  void do_long  ()                     { pass_long();   _jni_offset += 2; _offset += 2; }
+		void do_float ()                     { pass_int();    _jni_offset++; _offset++;       }
+		void do_double()                     { pass_double(); _jni_offset += 2; _offset += 2; }
 #endif
-  void do_void  ()                     { ShouldNotReachHere();                               }
-  void do_object(int begin, int end)   { pass_object(); _jni_offset++; _offset++;        }
-  void do_array (int begin, int end)   { pass_object(); _jni_offset++; _offset++;        }
+		void do_byte  ()                     { pass_int();    _jni_offset++; _offset++;       }
+		void do_short ()                     { pass_int();    _jni_offset++; _offset++;       }
+		void do_int   ()                     { pass_int();    _jni_offset++; _offset++;       }
+#ifdef _LP64
+		void do_long  ()                     { pass_long();   _jni_offset++; _offset += 2;    }
+#else
+		void do_long  ()                     { pass_long();   _jni_offset += 2; _offset += 2; }
+#endif
+		void do_void  ()                     { ShouldNotReachHere();                               }
+		void do_object(int begin, int end)   { pass_object(); _jni_offset++; _offset++;        }
+		void do_array (int begin, int end)   { pass_object(); _jni_offset++; _offset++;        }

- public:
-  methodHandle method() const          { return _method; }
-  int          offset() const          { return _offset; }
-  int      jni_offset() const          { return _jni_offset + _prepended; }
-//  int     java_offset() const          { return method()->size_of_parameters() - _offset - 1; }
-  bool      is_static() const          { return method()->is_static(); }
-  virtual void pass_int()              = 0;
-  virtual void pass_long()             = 0;
-  virtual void pass_object()           = 0;
+	public:
+		methodHandle method() const          { return _method; }
+		int          offset() const          { return _offset; }
+		int      jni_offset() const          { return _jni_offset + _prepended; }
+		//  int     java_offset() const          { return method()->size_of_parameters() - _offset - 1; }
+		bool      is_static() const          { return method()->is_static(); }
+		virtual void pass_int()              = 0;
+		virtual void pass_long()             = 0;
+		virtual void pass_object()           = 0;
 #ifdef _LP64
-  virtual void pass_float()            = 0;
-  virtual void pass_double()           = 0;
+		virtual void pass_float()            = 0;
+		virtual void pass_double()           = 0;
 #else
-  virtual void pass_double()           { pass_long(); }  // may be same as long
+		virtual void pass_double()           { pass_long(); }  // may be same as long
 #endif

-  NativeSignatureIterator(methodHandle method) : SignatureIterator(method->signature()) {
-    _method = method;
-    _offset = 0;
-    _jni_offset = 0;
+		NativeSignatureIterator(methodHandle method) : SignatureIterator(method->signature()) {
+			_method = method;
+			_offset = 0;
+			_jni_offset = 0;

-    const int JNIEnv_words = 1;
-    const int mirror_words = 1;
-    _prepended = !is_static() ? JNIEnv_words : JNIEnv_words + mirror_words;
-  }
+			const int JNIEnv_words = 1;
+			const int mirror_words = 1;
+			_prepended = !is_static() ? JNIEnv_words : JNIEnv_words + mirror_words;
+		}

-  // iterate() calles the 2 virtual methods according to the following invocation syntax:
-  //
-  // {pass_int | pass_long | pass_object}
-  //
-  // Arguments are handled from left to right (receiver first, if any).
-  // The offset() values refer to the Java stack offsets but are 0 based and increasing.
-  // The java_offset() values count down to 0, and refer to the Java TOS.
-  // The jni_offset() values increase from 1 or 2, and refer to C arguments.
+		// iterate() calles the 2 virtual methods according to the following invocation syntax:
+		//
+		// {pass_int | pass_long | pass_object}
+		//
+		// Arguments are handled from left to right (receiver first, if any).
+		// The offset() values refer to the Java stack offsets but are 0 based and increasing.
+		// The java_offset() values count down to 0, and refer to the Java TOS.
+		// The jni_offset() values increase from 1 or 2, and refer to C arguments.

-  void iterate() { iterate(Fingerprinter(method()).fingerprint());
-  }
+		void iterate() { iterate(Fingerprinter(method()).fingerprint());
+		}


-  // Optimized path if we have the bitvector form of signature
-  void iterate( uint64_t fingerprint ) {
+		// Optimized path if we have the bitvector form of signature
+		void iterate( uint64_t fingerprint ) {

-    if (!is_static()) {
-      // handle receiver (not handled by iterate because not in signature)
-      pass_object(); _jni_offset++; _offset++;
-    }
+			if (!is_static()) {
+				// handle receiver (not handled by iterate because not in signature)
+				pass_object(); _jni_offset++; _offset++;
+			}

-    SignatureIterator::iterate_parameters( fingerprint );
-  }
+			SignatureIterator::iterate_parameters( fingerprint );
+		}
 };


 // Handy stream for iterating over signature

 class SignatureStream : public StackObj {
- private:
-  symbolHandle _signature;
-  int          _begin;
-  int          _end;
-  BasicType    _type;
-  bool         _at_return_type;
+	private:
+		symbolHandle _signature;
+		int          _begin;
+		int          _end;
+		BasicType    _type;
+		bool         _at_return_type;

- public:
-  bool at_return_type() const                    { return _at_return_type; }
-  bool is_done() const;
-  void next_non_primitive(int t);
-  void next() {
-    symbolOop sig = _signature();
-    int len = sig->utf8_length();
-    if (_end >= len) {
-      _end = len + 1;
-      return;
-    }
+	public:
+		bool at_return_type() const                    { return _at_return_type; }
+		bool is_done() const;
+		void next_non_primitive(int t);
+		void next() {
+			symbolOop sig = _signature();
+			int len = sig->utf8_length();
+			if (_end >= len) {
+				_end = len + 1;
+				return;
+			}

-    _begin = _end;
-    int t = sig->byte_at(_begin);
-    switch (t) {
-      case 'B': _type = T_BYTE;    break;
-      case 'C': _type = T_CHAR;    break;
-      case 'D': _type = T_DOUBLE;  break;
-      case 'F': _type = T_FLOAT;   break;
-      case 'I': _type = T_INT;     break;
-      case 'J': _type = T_LONG;    break;
-      case 'S': _type = T_SHORT;   break;
-      case 'Z': _type = T_BOOLEAN; break;
-      case 'V': _type = T_VOID;    break;
-      default : next_non_primitive(t);
-                return;
-    }
-    _end++;
-  }
+			_begin = _end;
+			int t = sig->byte_at(_begin);
+			switch (t) {
+				case 'B': _type = T_BYTE;    break;
+				case 'C': _type = T_CHAR;    break;
+				case 'D': _type = T_DOUBLE;  break;
+				case 'F': _type = T_FLOAT;   break;
+				case 'I': _type = T_INT;     break;
+				case 'J': _type = T_LONG;    break;
+				case 'S': _type = T_SHORT;   break;
+				case 'Z': _type = T_BOOLEAN; break;
+				case 'V': _type = T_VOID;    break;
+				default : next_non_primitive(t);
+					  return;
+			}
+			_end++;
+		}

-  SignatureStream(symbolHandle signature,
-                  bool is_method = true) :
-                   _signature(signature), _at_return_type(false) {
-    _begin = _end = (is_method ? 1 : 0);  // skip first '(' in method signatures
-    next();
-  }
+		SignatureStream(symbolHandle signature,
+				bool is_method = true) :
+			_signature(signature), _at_return_type(false) {
+				_begin = _end = (is_method ? 1 : 0);  // skip first '(' in method signatures
+				next();
+			}

-  bool is_object() const;                        // True if this argument is an object
-  bool is_array() const;                         // True if this argument is an array
-  BasicType type() const                         { return _type; }
-  symbolOop as_symbol(TRAPS);
+		bool is_object() const;                        // True if this argument is an object
+		bool is_array() const;                         // True if this argument is an array
+		BasicType type() const                         { return _type; }
+		symbolOop as_symbol(TRAPS);

-  // return same as_symbol except allocation of new symbols is avoided.
-  symbolOop as_symbol_or_null();
+		// return same as_symbol except allocation of new symbols is avoided.
+		symbolOop as_symbol_or_null();
 };

 class SignatureVerifier : public StackObj {
-  public:
-    // Returns true if the symbol is valid method or type signature
-    static bool is_valid_signature(symbolHandle sig);
+	public:
+		// Returns true if the symbol is valid method or type signature
+		static bool is_valid_signature(symbolHandle sig);

-    static bool is_valid_method_signature(symbolHandle sig);
-    static bool is_valid_type_signature(symbolHandle sig);
-  private:
+		static bool is_valid_method_signature(symbolHandle sig);
+		static bool is_valid_type_signature(symbolHandle sig);
+	private:

-    static ssize_t is_valid_type(const char*, ssize_t);
-    static bool invalid_name_char(char);
+		static ssize_t is_valid_type(const char*, ssize_t);
+		static bool invalid_name_char(char);
 };
--- a/hotspot/src/share/vm/runtime/thread.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -2888,7 +2888,6 @@
     *canTryAgain = false; // don't let caller call JNI_CreateJavaVM again
     return JNI_ENOMEM;
   }
-
   // Enable guard page *after* os::create_main_thread(), otherwise it would
   // crash Linux VM, see notes in os_linux.cpp.
   main_thread->create_stack_guard_pages();
@@ -2897,6 +2896,7 @@
   ObjectSynchronizer::Initialize() ;

   // Initialize global modules
+
   jint status = init_globals();
   if (status != JNI_OK) {
     delete main_thread;
@@ -2941,7 +2941,6 @@

   assert (Universe::is_fully_initialized(), "not initialized");
   EXCEPTION_MARK;
-
   // At this point, the Universe is initialized, but we have not executed
   // any byte code.  Now is a good time (the only time) to dump out the
   // internal state of the JVM for sharing.
@@ -2954,17 +2953,14 @@
   // Always call even when there are not JVMTI environments yet, since environments
   // may be attached late and JVMTI must track phases of VM execution
   JvmtiExport::enter_start_phase();
-
   // Notify JVMTI agents that VM has started (JNI is up) - nop if no agents.
   JvmtiExport::post_vm_start();

   {
     TraceTime timer("Initialize java.lang classes", TraceStartupTime);
-
     if (EagerXrunInit && Arguments::init_libraries_at_startup()) {
       create_vm_init_libraries();
     }
-
     if (InitializeJavaLangString) {
       initialize_class(vmSymbolHandles::java_lang_String(), CHECK_0);
     } else {
@@ -2973,19 +2969,20 @@

     if (AggressiveOpts) {
       {
-        // Forcibly initialize java/util/HashMap and mutate the private
-        // static final "frontCacheEnabled" field before we start creating instances
-#ifdef ASSERT
+        // forcibly initialize java/util/hashmap and mutate the private
+        // static final "frontcacheenabled" field before we start creating instances
+#ifdef assert
         klassOop tmp_k = SystemDictionary::find(vmSymbolHandles::java_util_HashMap(), Handle(), Handle(), CHECK_0);
-        assert(tmp_k == NULL, "java/util/HashMap should not be loaded yet");
+        assert(tmp_k == NULL, "java/util/hashmap should not be loaded yet");
 #endif
-        klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_util_HashMap(), Handle(), Handle(), CHECK_0);
+        klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_util_HashMap(),
+								Handle(), Handle(), CHECK_0);
         KlassHandle k = KlassHandle(THREAD, k_o);
-        guarantee(k.not_null(), "Must find java/util/HashMap");
+        guarantee(k.not_null(), "must find java/util/hashmap");
         instanceKlassHandle ik = instanceKlassHandle(THREAD, k());
         ik->initialize(CHECK_0);
         fieldDescriptor fd;
-        // Possible we might not find this field; if so, don't break
+        // possible we might not find this field; if so, don't break
         if (ik->find_local_field(vmSymbols::frontCacheEnabled_name(), vmSymbols::bool_signature(), &fd)) {
           k()->bool_field_put(fd.offset(), true);
         }
@@ -3117,7 +3114,6 @@

   // Signal Dispatcher needs to be started before VMInit event is posted
   os::signal_init();
-
   // Start Attach Listener if +StartAttachListener or it can't be started lazily
   if (!DisableAttachMechanism) {
     if (StartAttachListener || AttachListener::init_at_startup()) {
@@ -3489,6 +3485,7 @@
   TraceRuntimeCalls = false;
 #endif

+
   VM_Exit::set_vm_exited();

   notify_vm_shutdown();
--- a/hotspot/src/share/vm/runtime/vframe.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/runtime/vframe.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -381,7 +381,8 @@
   // we are using the performance analyzer.
   // Disable this assert when testing the analyzer with fastdebug.
   // -XX:SuppressErrorAt=vframe.cpp:XXX (XXX=following line number)
-  assert(false, "invalid bci or invalid scope desc");
+  //assert(false, "invalid bci or invalid scope desc");
+  printf("invalid bci or invalid scope desc\n");
 }

 // top-frame will be skipped
--- a/hotspot/src/share/vm/runtime/vm_version.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/runtime/vm_version.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -145,7 +146,8 @@
 #define CPU      IA32_ONLY("x86")                \
                  IA64_ONLY("ia64")               \
                  AMD64_ONLY("amd64")             \
-                 SPARC_ONLY("sparc")
+                 SPARC_ONLY("sparc")             \
+                 MIPS_ONLY("mips")

 const char *Abstract_VM_Version::vm_platform_string() {
   return OS "-" CPU;
--- a/hotspot/src/share/vm/utilities/debug.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/utilities/debug.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,13 +38,21 @@
       }                                                              \
     }
 #else
-  #define assert(p,msg)                                          \
+   #define assert(p, msg) \
+   {\
+    if (!(p)) {                   \
+	printf("%s, %d , %s\n", __FILE__, __LINE__, "assert(" XSTR(p) ",\"" msg "\")");\
+    }                               \
+   }
+#endif
+
+/*  #define assert(p,msg)                                          \
     if (!(p)) {                                                  \
-      report_assertion_failure(__FILE__, __LINE__,               \
-                              "assert(" XSTR(p) ",\"" msg "\")");\
-      BREAKPOINT;                                                \
-    }
-#endif
+	printf("%s, %d , %s\n", __FILE__, __LINE__, "assert(" XSTR(p) ",\"" msg "\")");\
+  report_assertion_failure(__FILE__, __LINE__,               \
+                             "assert(" XSTR(p) ",\"" msg "\")");\
+        }*/
+//#endif

 // This version of assert is for use with checking return status from
 // library calls that return actual error values eg. EINVAL,
@@ -100,6 +109,9 @@
 // cheap tests that catch errors that would otherwise be hard to find
 // guarantee is also used for Verify options.
 #define guarantee(b,msg)         { if (!(b)) fatal("guarantee(" XSTR(b) ",\"" msg "\")"); }
+//#define guarantee(b,msg) do {                   \
+//   if (!(b)) fatal2("guarantee(%s,\"%s\")", XSTR(b), msg);   \
+//} while (0)

 #define ShouldNotCallThis()      { report_should_not_call        (__FILE__, __LINE__); BREAKPOINT; }
 #define ShouldNotReachHere()     { report_should_not_reach_here  (__FILE__, __LINE__); BREAKPOINT; }
--- a/hotspot/src/share/vm/utilities/macros.hpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/utilities/macros.hpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -186,6 +187,36 @@
 #define NOT_SPARC(code) code
 #endif

+#if defined(MIPS32)
+    #define MIPS32_ONLY(code) code
+    #define MIPS64_ONLY(code)
+    #define MIPS_ONLY(code) code
+    #define NOT_MIPS(code)
+    #define NOT_MIPS32(code)
+    #define NOT_MIPS64(code) code
+#elif defined(MIPS64)
+    #define MIPS32_ONLY(code)
+    #define MIPS64_ONLY(code) code
+    #define MIPS_ONLY(code) code
+    #define NOT_MIPS(code)
+    #define NOT_MIPS32(code) code
+    #define NOT_MIPS64(code)
+#else
+    #define MIPS32_ONLY(code)
+    #define MIPS64_ONLY(code)
+    #define MIPS_ONLY(code)
+    #define NOT_MIPS32(code) code
+    #define NOT_MIPS64(code) code
+    #define NOT_MIPS(code) code
+#endif
+
+#ifdef MIPS64
+
+#else
+#define MIPS64_ONLY(code)
+#define NOT_MIPS64(code) code
+#endif
+
 #define FIX_THIS(code) report_assertion_failure("FIX_THIS",__FILE__, __LINE__, "")

 #define define_pd_global(type, name, value) const type pd_##name = value;
--- a/hotspot/src/share/vm/utilities/xmlstream.cpp	Thu Sep 30 13:42:53 2010 +0800
+++ b/hotspot/src/share/vm/utilities/xmlstream.cpp	Thu Sep 30 13:48:16 2010 +0800
@@ -1,5 +1,6 @@
 /*
  * Copyright 2002-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2010 Lemote, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -328,7 +329,7 @@
 // ------------------------------------------------------------------
 void xmlStream::va_done(const char* format, va_list ap) {
   char buffer[200];
-  guarantee(strlen(format) + 10 < sizeof(buffer), "bigger format buffer")
+  guarantee(strlen(format) + 10 < sizeof(buffer), "bigger format buffer");
   const char* kind = format;
   const char* kind_end = strchr(kind, ' ');
   size_t kind_len = (kind_end != NULL) ? (kind_end - kind) : strlen(kind);
--- a/jdk/make/common/shared/Compiler-gcc.gmk	Thu Sep 30 13:42:53 2010 +0800
+++ b/jdk/make/common/shared/Compiler-gcc.gmk	Thu Sep 30 13:48:16 2010 +0800
@@ -76,10 +76,12 @@
     REQUIRED_GCC_VER = 4.0.*
   else
   ifeq ($(ARCH_DATA_MODEL), 32)
-    # i586
-    REQUIRED_CC_VER = 3.2
-    REQUIRED_GCC_VER = 3.2.1*
-    REQUIRED_GCC_VER_INT = 3.2.1-7a
+	#REQUIRED_CC_VER = 3.2
+	#REQUIRED_GCC_VER = 3.2.1*
+	#REQUIRED_GCC_VER_INT = 3.2.1-7a
+	REQUIRED_CC_VER = 4.1
+	REQUIRED_GCC_VER = 4.1.1*
+	REQUIRED_GCC_VER_INT = 4.1.2-*
   else
   ifeq ($(ARCH), amd64)
     # amd64
--- a/make/hotspot-rules.gmk	Thu Sep 30 13:42:53 2010 +0800
+++ b/make/hotspot-rules.gmk	Thu Sep 30 13:48:16 2010 +0800
@@ -64,7 +64,7 @@
 # Basic hotspot build and export of it's files
 #

-HOTSPOT_TARGET = all_product
+HOTSPOT_TARGET = product1
 ifeq ($(DEBUG_NAME), debug)
   HOTSPOT_TARGET = all_debug
 endif