changeset 2949:73a07d24174e

RTC Thumb2 JIT enhancements. 2012-05-16 Andrew Haley <aph@redhat.com> * src/cpu/zero/vm/thumb2.cpp: Throughout: T2EE_PRINT_* renamed to T2_PRINT_*. Route all debug info to stderr. We now do frameless compilation, so do all frame accesses relative to SP. Remove zombie detection pass. Remove dead code. Add OSPACE option. (H_LDC_W) (H_INVOKESTATIC_RESOLVED, H_INVOKESPECIAL_RESOLVED) (H_INVOKEVIRTUAL_RESOLVED, H_INVOKEVFINAL, H_MONITORENTER) (H_MONITOREXIT): New. (T2_* macros): Rename from T2EE_*. (SLOW_ENTRY_OFFSET, FAST_ENTRY_OFFSET): New. (THUMB2_CODEBUF_SIZE): Make this depend on PRODUCT. (H_GETSTATIC, H_PUTSTATIC, H_JSR, H_ZOMBIE, H_MONITOR): Delete. (H_DEADCODE, H_LDC_W, H_INVOKESTATIC_RESOLVED) (H_INVOKESPECIAL_RESOLVED, H_INVOKEVIRTUAL_RESOLVED) (H_INVOKEVFINAL): New. (DebugSwitch): New. (JAZ_V6): New local register. (Thumb2_pass1): Count reads and writes for register allocator. Delete zombie detection pass. (Thumb2_RegAlloc): New. (out_align, out_align_offset, nop_16, nop_32): New. (fullBarrier, storeBarrier): Chaeck for an MP OS. (load_local, store_local): Check for an istate reg. (load_istate, store_istate): New. (Thumb2_Load, Thumb2_LoadX2): Remove monitor stack adj. (Thumb2_Store, Thumb2_StoreX2): Likewise. (Debug): New. (Thumb2_save_locals, Thumb2_restore_locals, Thumb2_invoke_save) (Thumb2_invoke_restore, Thumb2_Exit): Remove monitor stackdepth adj. Move here from below. (Thumb2_Accessor): Rewrite for new method header layout. (Thumb2_Enter): Likewise. Do frameless setup. (Thumb2_load_long): Use a single ldrexd instruction. (Thumb2_codegen): Align branches. Call Debug if we're about to enter a synchronized method. (opc_jsr) Add handler. (opc_monitorenter, opc_monitorexit): Call handler instead of generating code. (Thumb2_Initialize): Disassmble ARM and Thumb code separately. * src/cpu/zero/vm/cppInterpreter_arm.S: Throughout: the thread pointer is now in a register, so use it everywhere. Set the thread pointer register at every entry point to the interpreter. Throughout: use the macros SLOW_ENTRY and FAST_ENTRY rather than ALIGN_CODE. Throughout: register tmp2 is no longer available, use other registers as appropriate. (T2JIT): Rename from THUMB2EE. (call_thumb2): Load all the thumb2 registers that have been allocated to locals. (accessor_entry): Check for stack overflow. (.fast_accessor_*): Delete dead code. (LOAD_FRAME): New. (Thumb2_invokevfinalresolved_stub) (Thumb2_invokevirtualresolved_stub): New. (Thumb2_invokestaticresolved_stub): New. (Thumb2_invokespecialresolved_stub): New. (Thumb2 stubs): Use FRAME_* rather than ISTATE_*; the frame pointer is no longer in a fixed register. (JAZ_REGSET, JAZ_*): Move delaration of JAZ registers here. (Thumb2_monitorenter): New. (normal_entry_synchronized): Rearrange so that we can load the thread pointer without exceeding the number of instructions that we can fit into the gap between SLOW_ENTRY and FAST_ENTRY. (normal_entry): Likewise. (MP_COMPILE_THRESHOLD, UP_COMPILE_THRESHOLD): Adjust. (TBIT): New. (FRAME_METHOD, FRAME_CONSTANTS, FRAME_BCP, FRAME_STACK_LIMIT) (FRAME_LOCALS, FRAME_STACK): New. (SLOW_ENTRY, FAST_ENTRY, SLOW_ENTRY_OFFSET, FAST_ENTRY_OFFSET): New. (LOAD_ISTATE): New. (DECACHE_STACK_USING_FRAME, DECACHE_JPC_USING_FRAME): New. (TRACE): Save and restore IP. Pass istate to my_trace(). (Opcode monitorenter): Remove all the assembler code and replace it with a call to Helper_monitorenter. * src/cpu/zero/vm/bytecodes_arm.def: Throughout: register tmp2 is no longer available, use other registers as appropriate. (lgetfield): Use ldrexd to load a jlong rather than an ldrexd/strexd loop. * src/cpu/zero/vm/asm_helper.cpp (ARCH_THUMB2): Renamed from ARCH_THUMBEE. (my_trace): New. (Helper_monitorenter): New. (Helper_monitorexit): New.
author aph
date Thu, 17 May 2012 13:45:50 -0400
parents 85de6921e39e
children f293db049783
files src/cpu/zero/vm/asm_helper.cpp src/cpu/zero/vm/bytecodes_arm.def src/cpu/zero/vm/cppInterpreter_arm.S src/cpu/zero/vm/thumb2.cpp
diffstat 4 files changed, 2725 insertions(+), 3053 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/zero/vm/asm_helper.cpp	Wed May 16 11:21:07 2012 +0100
+++ b/src/cpu/zero/vm/asm_helper.cpp	Thu May 17 13:45:50 2012 -0400
@@ -19,7 +19,7 @@
 
 #ifdef __arm__
 
-#define	ARCH_THUMBEE	(1<<16)
+#define	ARCH_THUMB2	(1<<16)
 #define ARCH_VFP	(1<<17)
 #define ARCH_CLZ	(1<<18)
 
@@ -50,6 +50,31 @@
 
 #define VECBUFF_SIZE 64
 
+static char valuebuf[128];
+
+// Return the name of the current method.  Not multi-thread safe.
+extern "C" char*
+meth(interpreterState istate) {
+  istate->method()->name_and_sig_as_C_string(valuebuf, sizeof valuebuf);
+  char *p = valuebuf + strlen(valuebuf);
+  sprintf(p, ": " PTR_FORMAT " (bci %d)",
+	  (intptr_t) istate->bcp(),
+	  istate->method()->bci_from(istate->bcp()));
+  return valuebuf;
+}
+
+// Used for debugging the interpreter.  The macro TRACE in
+// cppInterpreter_arm.S calls this routine, and you can trap on a
+// particular method.
+#define NAME1 "sun.nio.ch.FileChannelImpl$Unmapper.run()V"
+#define EQ(S1, S2) (S1 && (strncmp(S1, S2, strlen(S2)) == 0))
+extern "C" void my_trace(void *jpc, void *istate)
+{
+  char *name = meth((interpreterState)istate);
+  if (EQ(name, NAME1));
+    asm volatile("nop");  // Somewhere to put a breakpoint
+}
+
 extern "C" unsigned hwcap(void)
 {
   int fd;
@@ -70,7 +95,7 @@
       unsigned value = *p++;
       if (tag == 0) goto fini;
       if (tag == AT_HWCAP) {
-	if (value & HWCAP_THUMBEE) rc |= ARCH_THUMBEE;
+	if (value & HWCAP_THUMBEE) rc |= ARCH_THUMB2;
 	if (value & HWCAP_VFP) rc |= ARCH_VFP;
       } else if (tag == AT_PLATFORM) {
 	const char *s = (const char *)value;
@@ -87,7 +112,7 @@
   close(fd);
 //  printf("arch = %d, rc = 0x%08x\n", arch, rc);
   if (arch >= 5) rc |= ARCH_CLZ;
-  if (arch >= 7) rc |= ARCH_THUMBEE;
+  if (arch >= 7) rc |= ARCH_THUMB2;
   return rc | (1<<arch);
 }
 
@@ -229,6 +254,89 @@
     return istate->thread()->pending_exception();
 }
 
+extern "C" oop Helper_monitorenter(interpreterState istate, oop lockee)
+{
+    BasicObjectLock* limit = istate->monitor_base();
+    BasicObjectLock* most_recent = (BasicObjectLock*) istate->stack_base();
+    BasicObjectLock* entry = NULL;
+    markOop displaced;
+    JavaThread *thread = istate->thread();
+
+    if (lockee == NULL) {
+      HELPER_THROW(istate->thread(), vmSymbols::java_lang_NullPointerException(), "");
+      goto handle_exception;
+    }
+    while (most_recent != limit ) {
+      if (most_recent->obj() == NULL) entry = most_recent;
+      else if (most_recent->obj() == lockee) break;
+      most_recent++;
+    }
+    if (entry == NULL) {
+      int monitor_words = frame::interpreter_frame_monitor_size();
+      ZeroStack *stack = thread->zero_stack();
+
+      if (monitor_words > stack->available_words()) {
+        InterpreterRuntime::throw_StackOverflowError(thread);
+	goto handle_exception;
+      } else {
+	stack->alloc(monitor_words * wordSize);
+
+	for (intptr_t *p = istate->stack() + 1; p < istate->stack_base(); p++)
+	  *(p - monitor_words) = *p;
+
+	istate->set_stack_limit(istate->stack_limit() - monitor_words);
+	istate->set_stack(istate->stack() - monitor_words);
+	istate->set_stack_base(istate->stack_base() - monitor_words);
+
+	entry = (BasicObjectLock *) istate->stack_base();
+      }
+    }
+    entry->set_obj(lockee);
+    displaced = lockee->mark()->set_unlocked();
+    entry->lock()->set_displaced_header(displaced);
+    if (Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
+      // Is it simple recursive case?
+      if (thread->is_lock_owned((address) displaced->clear_lock_bits())) {
+	entry->lock()->set_displaced_header(NULL);
+      } else {
+	InterpreterRuntime::monitorenter(thread, entry);
+      }
+    }
+handle_exception:
+    return thread->pending_exception();
+}
+
+extern "C" oop Helper_monitorexit(interpreterState istate, oop lockee)
+{
+    BasicObjectLock* limit = istate->monitor_base();
+    BasicObjectLock* most_recent = (BasicObjectLock*) istate->stack_base();
+    JavaThread *thread = istate->thread();
+
+    if (lockee == NULL) {
+      HELPER_THROW(istate->thread(), vmSymbols::java_lang_NullPointerException(), "");
+      goto handle_exception;
+    }
+    while (most_recent != limit ) {
+      if ((most_recent)->obj() == lockee) {
+	BasicLock* lock = most_recent->lock();
+	markOop header = lock->displaced_header();
+	most_recent->set_obj(NULL);
+	if (header != NULL) {
+	  if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
+	    // restore object for the slow case
+	    most_recent->set_obj(lockee);
+	    InterpreterRuntime::monitorexit(thread, most_recent);
+	  }
+	}
+	return thread->pending_exception();
+      }
+      most_recent++;
+    }
+    InterpreterRuntime::throw_illegal_monitor_state_exception(thread);
+handle_exception:
+    return thread->pending_exception();
+}
+
 extern "C" oop Helper_aastore(interpreterState istate, oop value, int index, arrayOop arrayref)
 {
     if (arrayref == NULL) {
@@ -512,7 +620,7 @@
 int main(void)
 {
 	print_def("ARCH_VFP",			ARCH_VFP);
-	print_def("ARCH_THUMBEE",		ARCH_THUMBEE);
+	print_def("ARCH_THUMB2",		ARCH_THUMB2);
 	print_def("ARCH_CLZ",			ARCH_CLZ);
 	nl();
 	print_def("JVM_CONSTANT_Utf8",		JVM_CONSTANT_Utf8);
--- a/src/cpu/zero/vm/bytecodes_arm.def	Wed May 16 11:21:07 2012 +0100
+++ b/src/cpu/zero/vm/bytecodes_arm.def	Thu May 17 13:45:50 2012 -0400
@@ -335,13 +335,13 @@
 
 (lconst_1) lconst_1 {
 	DISPATCH_START_R2
-        mov     tmp2, #1
+        mov     r3, #1
 	DISPATCH_NEXT
         mov     tmp1, #0
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	PUSH	r3, tmp1
 	DISPATCH_FINISH
 }
 
@@ -367,10 +367,10 @@
 	DISPATCH_NEXT
         orr     tmp1, tmp1, #0x00f00000
 	DISPATCH_NEXT
-        mov     tmp2, #0
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+        mov     r3, #0
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
 	DISPATCH_FINISH
 }
 
@@ -419,11 +419,11 @@
 	DISPATCH_START	\seq_len
 	sub	r3, locals, r2, lsl #2
 	DISPATCH_NEXT
-	ldmda	r3, {tmp2, tmp1}
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	ldmda	r3, {r3, tmp1}
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r3, tmp1
 	DISPATCH_FINISH
 }
 
@@ -465,49 +465,49 @@
 
 (lload_0,dload_0) u8load_0 {
 	DISPATCH_START_R2
-        ldmda   locals, {tmp2-tmp1}
+        ldmda   locals, {r3, tmp1}
 	DISPATCH_NEXT
 	PUSH	tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r3
 	DISPATCH_FINISH
 }
 
 (lload_1,dload_1) u8load_1 {
 	DISPATCH_START_R2
-        ldmdb   locals, {tmp2-tmp1}
+        ldmdb   locals, {r3, tmp1}
 	DISPATCH_NEXT
 	PUSH	tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r3
 	DISPATCH_FINISH
 }
 
 (lload_2,dload_2) u8load_2 {
 	DISPATCH_START_R2
-	ldr	tmp2, [locals, #-12]
+	ldr	r3, [locals, #-12]
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, #-8]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	PUSH	r3, tmp1
 	DISPATCH_FINISH
 }
 
 (lload_3,dload_3) u8load_3 {
 	DISPATCH_START_R2
-	ldr	tmp2, [locals, #-16]
+	ldr	r3, [locals, #-16]
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, #-12]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	PUSH	r3, tmp1
 	DISPATCH_FINISH
 }
 
@@ -546,12 +546,12 @@
 	bcs	array_bound_exception_jpc_1
 	DISPATCH_NEXT
 	add	r3, r3, r2, lsl #3
-	ldr	tmp2, [r3, #BASE_OFFSET_LONG]
+	ldr	r1, [r3, #BASE_OFFSET_LONG]
 	DISPATCH_NEXT
 	ldr	tmp1, [r3, #20]
 	DISPATCH_NEXT
 	PUT_STACK	1, tmp1
-	PUT_STACK	0, tmp2
+	PUT_STACK	0, r1
 	DISPATCH_FINISH
 }
 
@@ -634,13 +634,13 @@
 @ r2 = [jpc, #1]
 (lstore,dstore) u8store {
 	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
+	POP	r1, tmp1
 	DISPATCH_NEXT
 	sub	r3, locals, r2, lsl #2
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	stmda	r3, {tmp2, tmp1}
+	stmda	r3, {r1, tmp1}
 	DISPATCH_FINISH
 }
 
@@ -691,30 +691,30 @@
 (lstore_0,dstore_0) u8store_0 {
 	DISPATCH_START_R2
 	DISPATCH_NEXT
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-        stmda   locals, {tmp2, tmp1}
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        stmda   locals, {r1, tmp1}
 	DISPATCH_FINISH
 }
 
 (lstore_1,dstore_1) u8store_1 {
 	DISPATCH_START_R2
 	DISPATCH_NEXT
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-        stmdb   locals, {tmp2, tmp1}
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+        stmdb   locals, {r1, tmp1}
 	DISPATCH_FINISH
 }
 
 (lstore_2,dstore_2) u8store_2 {
 	DISPATCH_START_R2
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-        str     tmp2, [locals, #-12]
+	POP	r1, tmp1
+	DISPATCH_NEXT
+        str     r1, [locals, #-12]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -724,9 +724,9 @@
 
 (lstore_3,dstore_3) u8store_3 {
 	DISPATCH_START_R2
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-        str     tmp2, [locals, #-16]
+	POP	r1, tmp1
+	DISPATCH_NEXT
+        str     r1, [locals, #-16]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -735,72 +735,72 @@
 }
 
 (iastore,fastore) u4astore {
-	POP	r3, tmp2, tmp1		@ r3 = value, tmp2 = index, tmp1 = arrayref
+	POP	r1, tmp1, lr		@ r1 = value, tmp1 = index, lr = arrayref
 	DISPATCH_START_R2
-	SW_NPC	cmp	tmp1, #0
+	SW_NPC	cmp	lr, #0
 	SW_NPC	beq	null_ptr_exception_jpc_1
 .abortentry10:
-	ldr	lr, [tmp1, #8]		@ lr = limit
-	DISPATCH_NEXT
-	cmp	tmp2, lr
-	bcs	array_bound_exception_jpc_1_tmp2
-	DISPATCH_NEXT
-	add	tmp1, tmp1, tmp2, lsl #2
-	DISPATCH_NEXT
-	str	r3, [tmp1, #BASE_OFFSET_WORD]
+	ldr	ip, [lr, #8]		@ lr = limit
+	DISPATCH_NEXT
+	cmp	tmp1, ip
+	bcs	array_bound_exception_jpc_1_tmp1
+	DISPATCH_NEXT
+	add	lr, lr, tmp1, lsl #2
+	DISPATCH_NEXT
+	str	r1, [lr, #BASE_OFFSET_WORD]
 	DISPATCH_FINISH
 }
 
 (lastore,dastore) u8astore {
-	POP	r1, r3, tmp2, tmp1		@ r1,r3 = value, tmp2 = index, tmp1 = arrayref
+	POP	r1, r3, tmp1, lr		@ r1,r3 = value, tmp1 = index, lr = arrayref
 	DISPATCH_START_R2
-	SW_NPC	cmp	tmp1, #0
+	SW_NPC	cmp	lr, #0
 	SW_NPC	beq	null_ptr_exception_jpc_1
 .abortentry11:
-	ldr	ip, [tmp1, #8]		@ ip = limit
-	cmp	tmp2, ip
-	DISPATCH_NEXT
-	bcs	array_bound_exception_jpc_1_tmp2
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2, lsl #3
-	str	r1, [tmp2, #BASE_OFFSET_LONG]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	r3, [tmp2, #20]
+	ldr	ip, [lr, #8]		@ ip = limit
+	cmp	tmp1, ip
+	DISPATCH_NEXT
+	bcs	array_bound_exception_jpc_1_tmp1
+	DISPATCH_NEXT
+	add	tmp1, lr, tmp1, lsl #3
+	str	r1, [tmp1, #BASE_OFFSET_LONG]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r3, [tmp1, #BASE_OFFSET_LONG+4]
 	DISPATCH_FINISH
 }
 
 (bastore) bastore {
-	POP	r3, tmp2, tmp1		@ r3 = value, tmp2 = index, tmp1 = arrayref
+	POP	r3, tmp1, lr		@ r3 = value, tmp1 = index, lr = arrayref
 	DISPATCH_START_R2
-	SW_NPC	cmp	tmp1, #0
+	SW_NPC	cmp	lr, #0
 	SW_NPC	beq	null_ptr_exception_jpc_1
 .abortentry12:
-	ldr	lr, [tmp1, #8]		@ lr = limit
-	DISPATCH_NEXT
-	cmp	tmp2, lr
-	bcs	array_bound_exception_jpc_1_tmp2
-	DISPATCH_NEXT
-	add	tmp1, tmp1, tmp2
-	DISPATCH_NEXT
-	strb	r3, [tmp1, #BASE_OFFSET_BYTE]
+	ldr	ip, [lr, #8]		@ ip = limit
+	DISPATCH_NEXT
+	cmp	tmp1, ip
+	bcs	array_bound_exception_jpc_1_tmp1
+	DISPATCH_NEXT
+	add	lr, lr, tmp1
+	DISPATCH_NEXT
+	strb	r3, [lr, #BASE_OFFSET_BYTE]
 	DISPATCH_FINISH
 }
 
 (castore,sastore) u2astore {
-	POP	r3, tmp2, tmp1		@ r3 = value, tmp2 = index, tmp1 = arrayref
+	POP	r3, tmp1, lr		@ r3 = value, tmp1 = index, lr = arrayref
 	DISPATCH_START_R2
-	SW_NPC	cmp	tmp1, #0
+	SW_NPC	cmp	lr, #0
 	SW_NPC	beq	null_ptr_exception_jpc_1
 .abortentry13:
-	ldr	lr, [tmp1, #8]		@ lr = limit
-	DISPATCH_NEXT
-	cmp	tmp2, lr
-	bcs	array_bound_exception_jpc_1_tmp2
-	DISPATCH_NEXT
-	add	tmp1, tmp1, tmp2, lsl #1
-	DISPATCH_NEXT
-	strh	r3, [tmp1, #BASE_OFFSET_SHORT]
+	ldr	ip, [lr, #8]		@ ip = limit
+	DISPATCH_NEXT
+	cmp	tmp1, ip
+	bcs	array_bound_exception_jpc_1_tmp1
+	DISPATCH_NEXT
+	add	lr, lr, tmp1, lsl #1
+	DISPATCH_NEXT
+	strh	r3, [lr, #BASE_OFFSET_SHORT]
 	DISPATCH_FINISH
 }
 
@@ -884,13 +884,13 @@
 
 (iadd) iadd {
 	DISPATCH_START_R2
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	add	tmp2, tmp2, tmp1
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	add	r1, r1, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -907,13 +907,13 @@
 
 (isub) isub {
 	DISPATCH_START_R2
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -932,11 +932,11 @@
 	DISPATCH_START_R2
 	POP	r2, tmp1
 	DISPATCH_NEXT
-	mul	tmp2, r2, tmp1
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	mul	r1, r2, tmp1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -1113,13 +1113,13 @@
 
 (iand) iand {
 	DISPATCH_START_R2
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	and	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	and	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -1136,13 +1136,13 @@
 
 (ior) ior {
 	DISPATCH_START_R2
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	orr	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	orr	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -1159,13 +1159,13 @@
 
 (ixor) ixor {
 	DISPATCH_START_R2
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	eor	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	eor	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -1338,78 +1338,48 @@
 	POP	r3
 	ldrb	r1, [jpc, #2]
         cmp     r3, #0
-	beq	1f
+	beq	branch_taken_unsafe
 	DISPATCH 3
-1:
-	mov	r2, r2, lsl #24
-        orr     ip, r1, r2, asr #16
-        ldrb  r0, [jpc, ip]!
-	DISPATCH_BYTECODE
 }
 
 (ifne,ifnonnull) ifne_unsafe {
 	POP	r3
 	ldrb	r1, [jpc, #2]
         cmp     r3, #0
-	bne	1f
+	bne	branch_taken_unsafe
 	DISPATCH 3
-1:
-	mov	r2, r2, lsl #24
-        orr     ip, r1, r2, asr #16
-        ldrb  r0, [jpc, ip]!
-	DISPATCH_BYTECODE
 }
 
 (iflt) iflt_unsafe {
 	POP	r3
 	ldrb	r1, [jpc, #2]
         cmp     r3, #0
-	blt	1f
+	blt	branch_taken_unsafe
 	DISPATCH 3
-1:
-	mov	r2, r2, lsl #24
-        orr     ip, r1, r2, asr #16
-        ldrb  r0, [jpc, ip]!
-	DISPATCH_BYTECODE
 }
 
 (ifge) ifge_unsafe {
 	POP	r3
 	ldrb	r1, [jpc, #2]
         cmp     r3, #0
-	bge	1f
+	bge	branch_taken_unsafe
 	DISPATCH 3
-1:
-	mov	r2, r2, lsl #24
-        orr     ip, r1, r2, asr #16
-        ldrb  r0, [jpc, ip]!
-	DISPATCH_BYTECODE
 }
 
 (ifgt) ifgt_unsafe {
 	POP	r3
 	ldrb	r1, [jpc, #2]
         cmp     r3, #0
-	bgt	1f
+	bgt	branch_taken_unsafe
 	DISPATCH 3
-1:
-	mov	r2, r2, lsl #24
-        orr     ip, r1, r2, asr #16
-        ldrb  r0, [jpc, ip]!
-	DISPATCH_BYTECODE
 }
 
 (ifle) ifle_unsafe {
 	POP	r3
 	ldrb	r1, [jpc, #2]
         cmp     r3, #0
-	ble	1f
+	ble	branch_taken_unsafe
 	DISPATCH 3
-1:
-	mov	r2, r2, lsl #24
-        orr     ip, r1, r2, asr #16
-        ldrb  r0, [jpc, ip]!
-	DISPATCH_BYTECODE
 }
 
 (if_icmpeq,if_acmpeq) if_icmpeq_unsafe {
@@ -1474,9 +1444,9 @@
 
 (jsr) jsr {
 	ldr	r3, [istate, #ISTATE_METHOD]
-	ldr	r1, [r3, #8]
+	ldr	r1, [r3, #METHOD_CONSTMETHOD]
 	rsb	r2, r1, jpc
-	sub	r2, r2, #45
+	sub	r2, r2, #CONSTMETHOD_CODEOFFSET - 3
 	PUSH	r2
 	b	do_goto
 }
@@ -1485,10 +1455,10 @@
 @ r1 = [jpc, #2]
 (ret) ret {
 	ldr	r0, [istate, #ISTATE_METHOD]
-	ldr	r3, [r0, #8]
+	ldr	r3, [r0, #METHOD_CONSTMETHOD]
 	ldr	r1, [locals, -r2, lsl #2]
 	add	jpc, r3, r1
-	DISPATCH	48
+	DISPATCH	CONSTMETHOD_CODEOFFSET
 }
 
 @ We dont do safe and unsafe versions of tableswitch and lookupswitch
@@ -1575,30 +1545,30 @@
 	ldrb	r1, [jpc, #2]
 	DISPATCH_START	3
 	POP	tmp1
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r3, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry78:
-	ldr	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry78_v:
-	ldr	tmp2, [tmp1, tmp2]
+	ldr	r1, [tmp1, r1]
 	FullBarrier
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -1606,30 +1576,30 @@
 	ldrb	r1, [jpc, #2]
 	DISPATCH_START	3
 	POP	tmp1
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r3, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry79:
-	ldrsb	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldrsb	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry79_v:
-	ldrsb	tmp2, [tmp1, tmp2]
+	ldrsb	r1, [tmp1, r1]
 	FullBarrier
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -1637,30 +1607,30 @@
 	ldrb	r1, [jpc, #2]
 	DISPATCH_START	3
 	POP	tmp1
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r3, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry80:
-	ldrh	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldrh	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry80_v:
-	ldrh	tmp2, [tmp1, tmp2]
+	ldrh	r1, [tmp1, r1]
 	FullBarrier
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -1668,30 +1638,30 @@
 	ldrb	r1, [jpc, #2]
 	DISPATCH_START	3
 	POP	tmp1
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r3, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry81:
-	ldrsh	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldrsh	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry81_v:
-	ldrsh	tmp2, [tmp1, tmp2]
+	ldrsh	r1, [tmp1, r1]
 	FullBarrier
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -1699,40 +1669,37 @@
 	ldrb	r1, [jpc, #2]
 	DISPATCH_START	3
 	POP	tmp1
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r3, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
+	GO_IF_VOLATILE	r3, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
 	DISPATCH_NEXT
 .abortentry82:
-	ldmia	tmp2, {tmp2, tmp1}
-	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	ldmia	r1, {r1, tmp1}
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
-	add	tmp2, tmp1, tmp2
+	ldr	r1, [r1, #CP_OFFSET+8]
+	add	r1, tmp1, r1
 #ifndef	__ARM_ARCH_7A__
 .abortentry82_v:
-	ldmia	tmp2, {tmp2, tmp1}
+	ldmia	r1, {r2, r3}
 #else
-	mov	ip, tmp2
 .abortentry82_v:
-	ldrexd	tmp2, tmp1 , [ip]
-	strexd	r2 , tmp2, tmp1, [ip]
-	teq	r2, #0
-	bne	.abortentry82_v
+	ldrexd	r2, r3 , [r1]
 #endif
+	// Be very careful here: you must be certain that
+	// DISPATCH_NEXT does not corrupt R2 or R3.
 	DISPATCH_NEXT
 	FullBarrier
-	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	PUSH	r2, r3
 	DISPATCH_FINISH
 }
 
@@ -1740,28 +1707,28 @@
 	ldrb	r1, [jpc, #2]
 	DISPATCH_START	3
 	POP	r3, tmp1		@ r3 = value, tmp1 = object
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r2, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r2, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 .abortentry83:
-	str	r3, [tmp1, tmp2]
+	str	r3, [tmp1, r1]
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	StoreStoreBarrier
 .abortentry83_v:
-	str	r3, [tmp1, tmp2]
+	str	r3, [tmp1, r1]
 	StoreLoadBarrier
 	DISPATCH_FINISH
 }
@@ -1770,28 +1737,28 @@
 	ldrb	r1, [jpc, #2]
 	DISPATCH_START	3
 	POP	r3, tmp1		@ r3 = value, tmp1 = object
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r2, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r2, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 .abortentry84:
-	strh	r3, [tmp1, tmp2]
+	strh	r3, [tmp1, r1]
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	StoreStoreBarrier
 .abortentry84_v:
-	strh	r3, [tmp1, tmp2]
+	strh	r3, [tmp1, r1]
 	StoreLoadBarrier
 	DISPATCH_FINISH
 }
@@ -1800,28 +1767,28 @@
 	ldrb	r1, [jpc, #2]
 	DISPATCH_START	3
 	POP	r3, tmp1		@ r3 = value, tmp1 = object
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r2, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r2, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 .abortentry85:
-	strb	r3, [tmp1, tmp2]
+	strb	r3, [tmp1, r1]
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	ldr	r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	StoreStoreBarrier
 .abortentry85_v:
-	strb	r3, [tmp1, tmp2]
+	strb	r3, [tmp1, r1]
 	StoreLoadBarrier
 	DISPATCH_FINISH
 }
@@ -1829,23 +1796,23 @@
 (aputfield) aputfield {
 	ldrb	r1, [jpc, #2]
 	POP	r3, tmp1		@ r3 = value, tmp1 = object
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r2, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r2, r1, 3f
+	ldr	r1, [r1, #CP_OFFSET+8]
 .abortentry113:
-	str	r3, [tmp1, tmp2]
+	str	r3, [tmp1, r1]
 	mov	r0, tmp1
 	bl	Helper_aputfield
 	DISPATCH 3
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+	ldr	r1, [r1, #CP_OFFSET+8]
 	StoreStoreBarrier
 .abortentry113_v:
-	str	r3, [tmp1, tmp2]
+	str	r3, [tmp1, r1]
 	StoreLoadBarrier
 	mov	r0, tmp1
 	bl	Helper_aputfield
@@ -1854,40 +1821,41 @@
 
 (lputfield) lputfield {
 	ldrb	r1, [jpc, #2]
-	DISPATCH_START	3
 	POP	r3, tmp1, lr		@ r3, tmp1 = value, lr = object
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
-	DISPATCH_NEXT
+        add     r1, constpool, r1, lsl #12
+	add	r1, r1, r2, lsl #4
 	SW_NPC	cmp	lr, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE	r2, tmp2, 3f
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
-	DISPATCH_NEXT
-	add	tmp2, lr, tmp2
-	DISPATCH_NEXT
+	GO_IF_VOLATILE	r2, r1, 3f
+	DISPATCH_START	3
+	ldr	r1, [r1, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	add	r1, lr, r1
 	DISPATCH_NEXT
 .abortentry86:
-	stm	tmp2, {r3, tmp1}
+	stm	r1, {r3, tmp1}
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-	ldr	tmp2, [tmp2, #CP_OFFSET+8]
-	add	tmp2, lr, tmp2
+	ldr	r1, [r1, #CP_OFFSET+8]
+	add	r1, lr, r1
 	StoreStoreBarrier
 #ifndef	__ARM_ARCH_7A__
 .abortentry86_v:
-	stm	tmp2, {r3, tmp1}
+	stm	r1, {r3, tmp1}
 #else
-	mov	ip, tmp2
-	mov	tmp2, r3
-	// Data in tmp1 & tmp2, address in ip, r2 & r3 scratch
+	mov	ip, r1
+	mov	r1, r3
+	// Data in tmp1 & r1, address in ip, r2 & r3 scratch
+	mov	r0, r1
+	mov	r1, tmp1
 .abortentry86_v:
 	ldrexd	r2, r3, [ip]
-	strexd	r2, tmp2, tmp1, [ip]
+	strexd	r2, r0, r1, [ip]
 	teq	r2, #0
 	bne	.abortentry86_v
 #endif
+	DISPATCH_START	3
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	StoreLoadBarrier
@@ -1900,16 +1868,16 @@
 @ r1 = [jpc, #2]
 (getstatic) getstatic {
 	ldrb	r1, [jpc, #2]
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
-        ldr     r3, [tmp2, #CP_OFFSET]
+        add     tmp1, constpool, r1, lsl #12
+	add	tmp1, tmp1, r2, lsl #4
+        ldr     r3, [tmp1, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
 	cmp	r3, #opc_getstatic << 16
 	blne	resolve_get_put
-	GO_IF_VOLATILE	r2, tmp2, 3f
-	ldr	r3, [tmp2, #CP_OFFSET+4]
-	ldr	r2, [tmp2, #CP_OFFSET+12]
-        ldr     lr, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE	r2, tmp1, 3f
+	ldr	r3, [tmp1, #CP_OFFSET+4]
+	ldr	r2, [tmp1, #CP_OFFSET+12]
+        ldr     lr, [tmp1, #CP_OFFSET+8]
         movs    r2, r2, lsr #29
 	bhi	getstatic_w		@ C = 1, Z = 0 => R2 == 3, 5, 7
 	bcs	getstatic_h		@ C = 1 => R2 = 1
@@ -1919,9 +1887,9 @@
 	b	getstatic_sh
 3:
 	VOLATILE_VERSION
-	ldr	r3, [tmp2, #CP_OFFSET+4]
-	ldr	r2, [tmp2, #CP_OFFSET+12]
-        ldr     lr, [tmp2, #CP_OFFSET+8]
+	ldr	r3, [tmp1, #CP_OFFSET+4]
+	ldr	r2, [tmp1, #CP_OFFSET+12]
+        ldr     lr, [tmp1, #CP_OFFSET+8]
         movs    r2, r2, lsr #29
 	bhi	getstatic_volatile_w		@ C = 1, Z = 0 => R2 == 3, 5, 7
 	bcs	getstatic_volatile_h		@ C = 1 => R2 = 1
@@ -1935,16 +1903,16 @@
 @ r1 = [jpc, #2]
 (putstatic) putstatic {
 	ldrb	r1, [jpc, #2]
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
-        ldr     r3, [tmp2, #CP_OFFSET]
+        add     tmp1, constpool, r1, lsl #12
+	add	tmp1, tmp1, r2, lsl #4
+        ldr     r3, [tmp1, #CP_OFFSET]
         and     r3, r3, #0xff000000
         cmp     r3, #opc_putstatic << 24
 	blne	resolve_get_put
-	GO_IF_VOLATILE	r2, tmp2, 3f
-	ldr	r3, [tmp2, #CP_OFFSET+4]		@ r3 = object
-        ldr     lr, [tmp2, #CP_OFFSET+12]           @ lr = tos_type
-        ldr     r2, [tmp2, #CP_OFFSET+8]            @ r2 = offset
+	GO_IF_VOLATILE	r2, tmp1, 3f
+	ldr	r3, [tmp1, #CP_OFFSET+4]		@ r3 = object
+        ldr     lr, [tmp1, #CP_OFFSET+12]           @ lr = tos_type
+        ldr     r2, [tmp1, #CP_OFFSET+8]            @ r2 = offset
 	movs	lr, lr, lsr #29
 	bhi	putstatic_w		@ C = 1, Z = 0 => R2 == 3, 5, 7
 	bcs	putstatic_h		@ C = 1 => R2 = 1
@@ -1954,9 +1922,9 @@
 	b	putstatic_sh
 3:
 	VOLATILE_VERSION
-	ldr	r3, [tmp2, #CP_OFFSET+4]		@ r3 = object
-        ldr     lr, [tmp2, #CP_OFFSET+12]           @ lr = tos_type
-        ldr     r2, [tmp2, #CP_OFFSET+8]            @ r2 = offset
+	ldr	r3, [tmp1, #CP_OFFSET+4]		@ r3 = object
+        ldr     lr, [tmp1, #CP_OFFSET+12]           @ lr = tos_type
+        ldr     r2, [tmp1, #CP_OFFSET+8]            @ r2 = offset
 	movs	lr, lr, lsr #29
 	bhi	putstatic_volatile_w		@ C = 1, Z = 0 => R2 == 3, 5, 7
 	bcs	putstatic_volatile_h		@ C = 1 => R2 = 1
@@ -1970,26 +1938,25 @@
 
 (return) return_unsafe {
 
-	ldr	tmp2, [istate, #ISTATE_MONITOR_BASE]	@ tmp2 = base
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
 	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
-	ldr	tmp_xxx, [istate, #ISTATE_THREAD]
-
-	cmp	tmp1, tmp2
+
+	cmp	tmp1, r9
 	bcc	1f
 2:
 	mov	r3, #0
 
-	ldr	r2, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_SP]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [r2, #0]
 	ldrh	r0, [r0, #40]
 	add	r1, r2, #4
-	str	r3, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 
 	add	stack, r1, r0, lsl #2
 
-	str	stack, [tmp_xxx, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
@@ -2000,27 +1967,26 @@
 
 (ireturn,areturn,freturn) ireturn_unsafe {
 
-	ldr	tmp2, [istate, #ISTATE_MONITOR_BASE]	@ tmp2 = base
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
 	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
-	ldr	tmp_xxx, [istate, #ISTATE_THREAD]
-
-	cmp	tmp1, tmp2
+
+	cmp	tmp1, r9
 	bcc	1f
 2:
 	mov	r3, #0
 
 	POP	r1					@ pop result before we lose stack
 
-	ldr	stack, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_SP]
+	ldr	stack, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [stack, #0]
 	ldrh	r0, [r0, #40]
 
-	str	r3, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	r1, [stack, r0, lsl #2]!
 
-	str	stack, [tmp_xxx, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
@@ -2031,28 +1997,27 @@
 
 (lreturn,dreturn) lreturn_unsafe {
 
-	ldr	tmp2, [istate, #ISTATE_MONITOR_BASE]	@ tmp2 = base
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
 	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
-	ldr	tmp_xxx, [istate, #ISTATE_THREAD]
-	cmp	tmp1, tmp2
+	cmp	tmp1, r9
 	bcc	1f
 2:
 	mov	r3, #0
 
-	ldr	r2, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_SP]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [r2, #0]
 	ldrh	r0, [r0, #40]
 	add	r1, r2, #4
-	str	r3, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 
 	POP	r2, r3
 
 	add	stack, r1, r0, lsl #2
 	stmdb	stack!, {r2, r3}
 
-	str	stack, [tmp_xxx, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
@@ -2099,21 +2064,20 @@
 	DISPATCH	0
 2:
 	sub	jpc, jpc, #\seq_len
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
         DECACHE_JPC
         DECACHE_STACK
 	mov	r1, #0
         bl      _ZN18InterpreterRuntime3ldcEP10JavaThreadb
-	ldr	r0, [istate, #ISTATE_THREAD]			@ thread
 	CACHE_CP
-	ldr	r1, [r0, #THREAD_PENDING_EXC]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
 	CACHE_JPC
 	cmp	r1, #0
 	bne	handle_exception
-	ldr	r3, [r0, #THREAD_VM_RESULT]
+	ldr	r3, [thread, #THREAD_VM_RESULT]
 	mov	r2, #0
 	PUSH	r3
-	str	r2, [r0, #THREAD_VM_RESULT]
+	str	r2, [thread, #THREAD_VM_RESULT]
 	DISPATCH	\seq_len
 }
 
@@ -2155,21 +2119,20 @@
 	DISPATCH	0
 2:
 	sub	jpc, jpc, #\seq_len
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
         DECACHE_JPC
         DECACHE_STACK
 	mov	r1, #1
         bl      _ZN18InterpreterRuntime3ldcEP10JavaThreadb
-	ldr	r0, [istate, #ISTATE_THREAD]			@ thread
 	CACHE_CP
-	ldr	r1, [r0, #THREAD_PENDING_EXC]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
 	CACHE_JPC
 	cmp	r1, #0
-	ldr	r3, [r0, #THREAD_VM_RESULT]
+	ldr	r3, [thread, #THREAD_VM_RESULT]
 	bne	handle_exception
 	mov	r2, #0
 	PUSH	r3
-	str	r2, [r0, #THREAD_VM_RESULT]
+	str	r2, [thread, #THREAD_VM_RESULT]
 	DISPATCH	\seq_len
 }
 
@@ -2190,10 +2153,6 @@
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 
-	cmp	tmp1, #JVM_CONSTANT_Long
-	cmpne	tmp1, #JVM_CONSTANT_Double
-	bne	vm_fatal_error
-
 	add	tmp1, r2, r3, lsl #2
 	ldr	r3, [tmp1, #CONSTANTPOOL_BASE]
 	ldr	tmp1, [tmp1, #CONSTANTPOOL_BASE+4]
@@ -2207,70 +2166,70 @@
 (iadd_u4store) {
 	ldrb	r3, [jpc, #2]
 	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
 	DISPATCH_NEXT
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
 (isub_u4store) {
 	ldrb	r3, [jpc, #2]
 	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
 	DISPATCH_NEXT
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
 (iand_u4store) {
 	ldrb	r3, [jpc, #2]
 	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	and	tmp2, tmp1, tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	and	r1, tmp1, r1
 	DISPATCH_NEXT
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
 (ior_u4store) {
 	ldrb	r3, [jpc, #2]
 	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	orr	tmp2, tmp1, tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	orr	r1, tmp1, r1
 	DISPATCH_NEXT
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
 (ixor_u4store) {
 	ldrb	r3, [jpc, #2]
 	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	eor	tmp2, tmp1, tmp2
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	eor	r1, tmp1, r1
 	DISPATCH_NEXT
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -2279,12 +2238,12 @@
 	DISPATCH_START	\seq_len
 	DISPATCH_NEXT
 	rsb	r3, lr, #opc_istore_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -2293,12 +2252,12 @@
 	DISPATCH_START	\seq_len
 	DISPATCH_NEXT
 	rsb	r3, lr, #opc_istore_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -2307,12 +2266,12 @@
 	DISPATCH_START	\seq_len
 	DISPATCH_NEXT
 	rsb	r3, lr, #opc_istore_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	and	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	and	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -2321,12 +2280,12 @@
 	DISPATCH_START	\seq_len
 	DISPATCH_NEXT
 	rsb	r3, lr, #opc_istore_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	orr	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	orr	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -2335,12 +2294,12 @@
 	DISPATCH_START	\seq_len
 	DISPATCH_NEXT
 	rsb	r3, lr, #opc_istore_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	eor	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	tmp2, [locals, r3, lsl #2]
+	POP	r1, tmp1
+	DISPATCH_NEXT
+	eor	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, r3, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -2368,10 +2327,10 @@
         DISPATCH_NEXT
         ldr     tmp1, [locals, r3, lsl #2]
         DISPATCH_NEXT
-        sub     tmp2, r2, #opc_iconst_0
+        sub     r1, r2, #opc_iconst_0
         DISPATCH_NEXT
         DISPATCH_NEXT
-        PUSH   tmp2, tmp1
+        PUSH   r1, tmp1
         DISPATCH_FINISH
 }
 
@@ -2381,32 +2340,32 @@
 	ldrb	r2, [jpc, #3]
 	rsb	tmp1, r0, #opc_iaccess_0
 	ldrb	r3, [jpc, #2]
-	add	tmp2, constpool, r2, lsl #12
+	add	r1, constpool, r2, lsl #12
 	DISPATCH_START	4
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	add	tmp2, r3, lsl #4
+	add	r1, r3, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
-	GO_IF_VOLATILE r3, tmp2, 3f
-	DISPATCH_NEXT
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE r3, r1, 3f
+	DISPATCH_NEXT
+        ldr     r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry87:
-	ldr	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
 	DISPATCH_NEXT
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
+        ldr     r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry87_v:
-	ldr	tmp2, [tmp1, tmp2]
+	ldr	r1, [tmp1, r1]
 	FullBarrier
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -2691,21 +2650,21 @@
 	rsb	tmp1, r1, #opc_iaccess_0
 	ldrb	r3, [jpc, #3]
 	ldr	lr, [locals, lr, lsl #2]
-	add	tmp2, constpool, r2, lsl #12
+	add	r1, constpool, r2, lsl #12
 	DISPATCH_START	\seq_len
 	PUSH	lr
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	add	tmp2, r3, lsl #4
+	add	r1, r3, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
 	DISPATCH_NEXT
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
+        ldr     r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry111:
-	ldr	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -2717,21 +2676,21 @@
 	rsb	tmp1, r1, #opc_iaccess_0
 	ldrb	r3, [jpc, #4]
 	ldr	lr, [locals, lr, lsl #2]
-	add	tmp2, constpool, r2, lsl #12
+	add	r1, constpool, r2, lsl #12
 	DISPATCH_START	\seq_len
 	PUSH	lr
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	add	tmp2, r3, lsl #4
+	add	r1, r3, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
 	DISPATCH_NEXT
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
+        ldr     r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry112:
-	ldr	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -2742,14 +2701,14 @@
 {
 	rsb	tmp1, r0, #opc_aload_0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r1, #opc_aload_0
+	rsb	r1, r1, #opc_aload_0
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
 	DISPATCH_FINISH
 }
 
@@ -2758,14 +2717,14 @@
 {
 	rsb	tmp1, r0, #opc_aload_0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r1, #opc_iload_0
+	rsb	r1, r1, #opc_iload_0
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
 	DISPATCH_FINISH
 }
 
@@ -2774,17 +2733,17 @@
 {
 	rsb	tmp1, r0, #opc_iload_0
         DISPATCH_START  \seq_len
-	rsb	tmp2, r1, #opc_aload_0
+	rsb	r1, r1, #opc_aload_0
         DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	cmp	r0, #opc_igetfield
-	ldr	tmp2, [locals, tmp2, lsl #2]
+	ldr	r1, [locals, r1, lsl #2]
         beq     1f
 2:
         DISPATCH_NEXT
         DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	PUSH	r1, tmp1
         DISPATCH_FINISH
 1:
 	ldrb	lr, [jpc, #-1]
@@ -2849,10 +2808,10 @@
 (aload_0,aload_1,aload_2,aload_3)
 {
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r1, #opc_aload_0
+	rsb	r1, r1, #opc_aload_0
 	DISPATCH_NEXT
 	rsb	tmp1, r2, #0
-	ldr	tmp2, [locals, tmp2, lsl #2]
+	ldr	r1, [locals, r1, lsl #2]
 	cmp	r0, #opc_igetfield
 	DISPATCH_NEXT
 	beq	1f
@@ -2860,7 +2819,7 @@
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	PUSH	r1, tmp1
 	DISPATCH_FINISH
 1:
 	ldrb	lr, [jpc, #-1]
@@ -2874,15 +2833,15 @@
 (iload_0,iload_1,iload_2,iload_3)
 {
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r1, #opc_iload_0
+	rsb	r1, r1, #opc_iload_0
 	DISPATCH_NEXT
 	rsb	tmp1, r2, #0
-	ldr	tmp2, [locals, tmp2, lsl #2]
+	ldr	r1, [locals, r1, lsl #2]
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	PUSH	r1, tmp1
 	DISPATCH_FINISH
 }
 
@@ -2897,16 +2856,16 @@
 
 @ r2 = [jpc, #1]
 (aload,fload)(iload,aload,fload) {
-	ldrb	tmp2, [jpc, #3]
+	ldrb	r1, [jpc, #3]
 	rsb	tmp1, r2, #0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, tmp2, #0
+	rsb	r1, r1, #0
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
 	DISPATCH_FINISH
 }
 
@@ -2917,16 +2876,16 @@
 }
 
 (iload)(aload,fload) {
-	ldrb	tmp2, [jpc, #3]
+	ldrb	r1, [jpc, #3]
 	rsb	tmp1, r2, #0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, tmp2, #0
+	rsb	r1, r1, #0
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, tmp1
 	DISPATCH_FINISH
 }
 
@@ -2937,13 +2896,13 @@
 {
 	rsb	tmp1, r0, #opc_aload_0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r1, #opc_astore_0
+	rsb	r1, r1, #opc_astore_0
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp1, [locals, tmp2, lsl #2]
+	str	tmp1, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -2952,45 +2911,45 @@
 {
 	rsb	tmp1, r0, #opc_iload_0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r1, #opc_istore_0
+	rsb	r1, r1, #opc_istore_0
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp1, [locals, tmp2, lsl #2]
+	str	tmp1, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
 (aload_0,aload_1,aload_2,aload_3)
 (astore,istore,fstore)
 {
-	ldrb	tmp2, [jpc, #2]
+	ldrb	r1, [jpc, #2]
 	rsb	tmp1, r0, #opc_aload_0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, tmp2, #0
+	rsb	r1, r1, #0
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp1, [locals, tmp2, lsl #2]
+	str	tmp1, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
 (iload_0,iload_1,iload_2,iload_3)
 (astore,istore,fstore)
 {
-	ldrb	tmp2, [jpc, #2]
+	ldrb	r1, [jpc, #2]
 	rsb	tmp1, r0, #opc_iload_0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, tmp2, #0
+	rsb	r1, r1, #0
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	tmp1, [locals, tmp2, lsl #2]
+	str	tmp1, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -3001,12 +2960,12 @@
 	DISPATCH_START	\seq_len
 	rsb	tmp1, r1, #opc_astore_0
 	DISPATCH_NEXT
-	rsb	tmp2, r2, #0
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	tmp2, [locals, tmp1, lsl #2]
+	rsb	r1, r2, #0
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, tmp1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -3017,27 +2976,27 @@
 	DISPATCH_START	\seq_len
 	rsb	tmp1, r1, #opc_istore_0
 	DISPATCH_NEXT
-	rsb	tmp2, r2, #0
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	tmp2, [locals, tmp1, lsl #2]
+	rsb	r1, r2, #0
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, tmp1, lsl #2]
 	DISPATCH_FINISH
 }
 
 @ r2 = [jpc, #1]
 (iload,aload,fload)(astore,istore,fstore) {
 	ldrb	tmp1, [jpc, #3]
-	rsb	tmp2, r2, #0
+	rsb	r1, r2, #0
 	DISPATCH_START	\seq_len
 	rsb	tmp1, tmp1, #0
 	DISPATCH_NEXT
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	str	tmp2, [locals, tmp1, lsl #2]
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	str	r1, [locals, tmp1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -3048,14 +3007,14 @@
 {
 	rsb	tmp1, r0, #opc_aload_0
 	DISPATCH_START	\seq_len
-	sub	tmp2, r1, #opc_iconst_0
+	sub	r1, r1, #opc_iconst_0
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
 	PUSH	tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -3073,14 +3032,14 @@
 {
         cmp     r0, #opc_iload
         DISPATCH_START  \seq_len
-        sub     r3, r1, #opc_iconst_0
+        sub     r1, r1, #opc_iconst_0
         DISPATCH_NEXT
-        ldr     tmp2, [locals, -r2, lsl #2]
+        ldr     r3, [locals, -r2, lsl #2]
         DISPATCH_NEXT
         beq     1f
         DISPATCH_NEXT
         DISPATCH_NEXT
-        PUSH   r3, tmp2
+        PUSH   r1, r3
         DISPATCH_FINISH
 1:
         mov     tmp1, #opc_iload_iconst_N
@@ -3445,12 +3404,12 @@
 (iload_0,iload_1,iload_2,iload_3)
 (iadd)
 {
-	POP	tmp2
+	POP	r1
 	rsb	tmp1, r0, #opc_iload_0
 	DISPATCH_START	\seq_len
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
-	add	tmp1, tmp2, tmp1
+	add	tmp1, r1, tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -3460,27 +3419,27 @@
 
 (iload,aload,fload)(iadd) {
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r2, #0
+	rsb	r1, r2, #0
 	POP	tmp1
 	DISPATCH_NEXT
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
 (iload_0,iload_1,iload_2,iload_3)
 (isub)
 {
-	POP	tmp2
+	POP	r1
 	rsb	tmp1, r0, #opc_iload_0
 	DISPATCH_START	\seq_len
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
-	sub	tmp1, tmp2, tmp1
+	sub	tmp1, r1, tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -3490,15 +3449,15 @@
 
 (iload,aload,fload)(isub) {
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r2, #0
+	rsb	r1, r2, #0
 	POP	tmp1
 	DISPATCH_NEXT
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -3627,12 +3586,12 @@
 (iload_0,iload_1,iload_2,iload_3)
 (iand)
 {
-	POP	tmp2
+	POP	r1
 	rsb	tmp1, r0, #opc_iload_0
 	DISPATCH_START	\seq_len
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
-	and	tmp1, tmp2, tmp1
+	and	tmp1, r1, tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -3642,27 +3601,27 @@
 
 (iload,aload,fload)(iand) {
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r2, #0
+	rsb	r1, r2, #0
 	POP	tmp1
 	DISPATCH_NEXT
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	and	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	and	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
 (iload_0,iload_1,iload_2,iload_3)
 (ior)
 {
-	POP	tmp2
+	POP	r1
 	rsb	tmp1, r0, #opc_iload_0
 	DISPATCH_START	\seq_len
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
-	orr	tmp1, tmp2, tmp1
+	orr	tmp1, r1, tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -3672,27 +3631,27 @@
 
 (iload,aload,fload)(ior) {
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r2, #0
+	rsb	r1, r2, #0
 	POP	tmp1
 	DISPATCH_NEXT
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	orr	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	orr	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
 (iload_0,iload_1,iload_2,iload_3)
 (ixor)
 {
-	POP	tmp2
+	POP	r1
 	rsb	tmp1, r0, #opc_iload_0
 	DISPATCH_START	\seq_len
 	ldr	tmp1, [locals, tmp1, lsl #2]
 	DISPATCH_NEXT
-	eor	tmp1, tmp2, tmp1
+	eor	tmp1, r1, tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -3702,15 +3661,15 @@
 
 (iload,aload,fload)(ixor) {
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r2, #0
+	rsb	r1, r2, #0
 	POP	tmp1
 	DISPATCH_NEXT
-	ldr	tmp2, [locals, tmp2, lsl #2]
-	DISPATCH_NEXT
-	eor	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	eor	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -4023,23 +3982,22 @@
 (ireturn,areturn,freturn)
 {
 	rsb	r0, r0, #opc_iload_0
-	ldr	tmp2, [istate, #ISTATE_MONITOR_BASE]	@ tmp2 = base
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
 	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
 	ldr	r1, [locals, r0, lsl #2]
-	ldr	tmp_xxx, [istate, #ISTATE_THREAD]
-	cmp	tmp1, tmp2
+	cmp	tmp1, r9
 	bcc	1f
 2:
 	mov	r3, #0
-	ldr	stack, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_SP]
+	ldr	stack, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [stack, #0]
 	ldrh	r0, [r0, #40]
-	str	r3, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	r1, [stack, r0, lsl #2]!
 
-	str	stack, [tmp_xxx, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
@@ -4053,23 +4011,22 @@
 
 (iload,aload,fload)(ireturn,areturn,freturn) {
 
-	ldr	tmp2, [istate, #ISTATE_MONITOR_BASE]	@ tmp2 = base
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
 	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
 	ldr	r1, [locals, -r2, lsl #2]
-	ldr	tmp_xxx, [istate, #ISTATE_THREAD]
-	cmp	tmp1, tmp2
+	cmp	tmp1, r9
 	bcc	1f
 2:
 	mov	r3, #0
-	ldr	stack, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_SP]
+	ldr	stack, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [stack, #0]
 	ldrh	r0, [r0, #40]
-	str	r3, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	r1, [stack, r0, lsl #2]!
 
-	str	stack, [tmp_xxx, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
@@ -4262,31 +4219,31 @@
 	ldrb	ip, [jpc, #4]
 	rsb	tmp1, r2, #0
 	ldrb	r3, [jpc, #3]
-	add	tmp2, constpool, ip, lsl #12
+	add	r1, constpool, ip, lsl #12
 	DISPATCH_START	5
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	add	tmp2, r3, lsl #4
+	add	r1, r3, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_3
 	DISPATCH_NEXT
-	GO_IF_VOLATILE r3, tmp2, 3f
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
+	GO_IF_VOLATILE r3, r1, 3f
+        ldr     r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry88:
-	ldr	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	PUSH	tmp2
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 3:
 	VOLATILE_VERSION
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
+        ldr     r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry88_v:
-	ldr	tmp2, [tmp1, tmp2]
+	ldr	r1, [tmp1, r1]
 	FullBarrier
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -4315,9 +4272,9 @@
 (iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(iadd) {
 	sub	tmp1, r0, #opc_iconst_0
 	DISPATCH_START	\seq_len
-	POP	tmp2
-	DISPATCH_NEXT
-	add	tmp1, tmp2, tmp1
+	POP	r1
+	DISPATCH_NEXT
+	add	tmp1, r1, tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -4328,9 +4285,9 @@
 (iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(isub) {
 	sub	tmp1, r0, #opc_iconst_0
 	DISPATCH_START	\seq_len
-	POP	tmp2
-	DISPATCH_NEXT
-	sub	tmp1, tmp2, tmp1
+	POP	r1
+	DISPATCH_NEXT
+	sub	tmp1, r1, tmp1
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
@@ -4461,23 +4418,22 @@
 }
 
 (iconst_m1,iconst_0,iconst_1,iconst_2,iconst_3,iconst_4,iconst_5)(ireturn) {
-	ldr	tmp2, [istate, #ISTATE_MONITOR_BASE]	@ tmp2 = base
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
 	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
 	sub	r1, r0, #opc_iconst_0
-	ldr	tmp_xxx, [istate, #ISTATE_THREAD]
-	cmp	tmp1, tmp2
+	cmp	tmp1, r9
 	bcc	1f
 2:
 	mov	r3, #0
-	ldr	stack, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_SP]
+	ldr	stack, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [stack, #0]
 	ldrh	r0, [r0, #40]
-	str	r3, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	r1, [stack, r0, lsl #2]!
 
-	str	stack, [tmp_xxx, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
@@ -4848,17 +4804,17 @@
 @ ---- iadd; xxx ------------------------------------------------------------
 
 (iadd)(iload,fload,aload) {
-	ldrb	r3, [jpc, #2]
-	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	rsb	r3, r3, #0
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	add	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -4866,15 +4822,15 @@
 (iload_0,iload_1,iload_2,iload_3)
 {
 	DISPATCH_START	\seq_len
-	rsb	r3, r1, #opc_iload_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	add	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -4931,12 +4887,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	add	tmp2, r3, r2
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	add	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -4944,12 +4900,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	add	tmp2, r3, r2
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	add	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -4970,17 +4926,17 @@
 @ ---- sub; xxx ------------------------------------------------------------
 
 (isub)(iload,fload,aload) {
-	ldrb	r3, [jpc, #2]
-	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	rsb	r3, r3, #0
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	sub	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -4988,15 +4944,15 @@
 (iload_0,iload_1,iload_2,iload_3)
 {
 	DISPATCH_START	\seq_len
-	rsb	r3, r1, #opc_iload_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	sub	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5053,12 +5009,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	sub	tmp2, r3, r2
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	sub	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -5066,12 +5022,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	sub	tmp2, r3, r2
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	sub	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -5092,17 +5048,17 @@
 @ ---- iand; xxx ------------------------------------------------------------
 
 (iand)(iload,fload,aload) {
-	ldrb	r3, [jpc, #2]
-	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	rsb	r3, r3, #0
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	DISPATCH_NEXT
-	and	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	and	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5110,15 +5066,15 @@
 (iload_0,iload_1,iload_2,iload_3)
 {
 	DISPATCH_START	\seq_len
-	rsb	r3, r1, #opc_iload_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	and	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	and	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5175,12 +5131,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	and	tmp2, r3, r2
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	and	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -5188,12 +5144,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	and	tmp2, r3, r2
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	and	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -5214,17 +5170,17 @@
 @ ---- ior; xxx ------------------------------------------------------------
 
 (ior)(iload,fload,aload) {
-	ldrb	r3, [jpc, #2]
-	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	rsb	r3, r3, #0
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	DISPATCH_NEXT
-	orr	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	orr	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5232,15 +5188,15 @@
 (iload_0,iload_1,iload_2,iload_3)
 {
 	DISPATCH_START	\seq_len
-	rsb	r3, r1, #opc_iload_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	orr	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	orr	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5297,12 +5253,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	orr	tmp2, r3, r2
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	orr	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -5310,12 +5266,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	orr	tmp2, r3, r2
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	orr	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -5337,17 +5293,17 @@
 @ ---- ixor; xxx ------------------------------------------------------------
 
 (ixor)(iload,fload,aload) {
-	ldrb	r3, [jpc, #2]
-	DISPATCH_START	\seq_len
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	rsb	r3, r3, #0
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	DISPATCH_NEXT
-	eor	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	ldrb	r1, [jpc, #2]
+	DISPATCH_START	\seq_len
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	eor	r3, tmp1, r3
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5355,15 +5311,15 @@
 (iload_0,iload_1,iload_2,iload_3)
 {
 	DISPATCH_START	\seq_len
-	rsb	r3, r1, #opc_iload_0
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
-	ldr	r3, [locals, r3, lsl #2]
-	eor	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	rsb	r1, r1, #opc_iload_0
+	POP	r3, tmp1
+	DISPATCH_NEXT
+	ldr	r1, [locals, r1, lsl #2]
+	eor	r3, tmp1, r3
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5420,12 +5376,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	eor	tmp2, r3, r2
-	DISPATCH_NEXT
-	add	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	eor	r1, r3, r2
+	DISPATCH_NEXT
+	add	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
         DISPATCH_FINISH
 }
 
@@ -5433,12 +5389,12 @@
 	DISPATCH_START	\seq_len
 	POP	r2, r3, tmp1
 	DISPATCH_NEXT
-	eor	tmp2, r3, r2
-	DISPATCH_NEXT
-	sub	tmp2, tmp1, tmp2
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUSH	tmp2
+	eor	r1, r3, r2
+	DISPATCH_NEXT
+	sub	r1, tmp1, r1
+	DISPATCH_NEXT
+	DISPATCH_NEXT
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -5463,15 +5419,15 @@
         ldrsb   tmp1, [jpc, #2]
 	sub	lr, r1, #opc_iconst_0
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r2, #0
-	ldr	r3, [locals, tmp2, lsl #2]
+	rsb	r1, r2, #0
+	ldr	r3, [locals, r1, lsl #2]
 	DISPATCH_NEXT
 	add	r3, r3, tmp1
 	DISPATCH_NEXT
 	PUSH	lr
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	r3, [locals, tmp2, lsl #2]
+	str	r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -5479,14 +5435,14 @@
 	ldrb	lr, [jpc, #4]
         ldrsb   tmp1, [jpc, #2]
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r2, #0
-	ldr	r3, [locals, tmp2, lsl #2]
+	rsb	r1, r2, #0
+	ldr	r3, [locals, r1, lsl #2]
 	DISPATCH_NEXT
 	rsb	lr, lr, #0
 	DISPATCH_NEXT
 	add	r3, r3, tmp1
 	DISPATCH_NEXT
-	str	r3, [locals, tmp2, lsl #2]
+	str	r3, [locals, r1, lsl #2]
 	ldr	tmp1, [locals, lr, lsl #2]
 	DISPATCH_NEXT
 	PUSH	tmp1
@@ -5499,12 +5455,12 @@
 	rsb	lr, r1, #opc_iload_0
         ldrsb   tmp1, [jpc, #2]
 	DISPATCH_START	\seq_len
-	rsb	tmp2, r2, #0
-	ldr	r3, [locals, tmp2, lsl #2]
+	rsb	r1, r2, #0
+	ldr	r3, [locals, r1, lsl #2]
 	DISPATCH_NEXT
 	add	r3, r3, tmp1
 	DISPATCH_NEXT
-	str	r3, [locals, tmp2, lsl #2]
+	str	r3, [locals, r1, lsl #2]
 	DISPATCH_NEXT
 	ldr	tmp1, [locals, lr, lsl #2]
 	DISPATCH_NEXT
@@ -5520,24 +5476,24 @@
 (iload,fload,aload) {
 	ldrb	r2, [jpc, #3]
 	rsb	tmp1, r0, #opc_iaccess_0
-	ldrb	r3, [jpc, #2]
-	add	tmp2, constpool, r2, lsl #12
+	ldrb	r1, [jpc, #2]
+	add	r3, constpool, r2, lsl #12
 	DISPATCH_START	6
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	add	tmp2, r3, lsl #4
-	ldrb	r3, [jpc, #-1]
+	add	r3, r1, lsl #4
+	ldrb	r1, [jpc, #-1]
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_5
 	DISPATCH_NEXT
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
-	DISPATCH_NEXT
-	rsb	r3, r3, #0
+        ldr     r3, [r3, #CP_OFFSET+8]
+	DISPATCH_NEXT
+	rsb	r1, r1, #0
 .abortentry89:
-	ldr	tmp2, [tmp1, tmp2]
-	ldr	r3, [locals, r3, lsl #2]
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	ldr	r3, [tmp1, r3]
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5546,22 +5502,22 @@
 	ldrb	r2, [jpc, #3]
 	rsb	tmp1, r0, #opc_iaccess_0
 	ldrb	ip, [jpc, #2]
-	add	tmp2, constpool, r2, lsl #12
+	add	r3, constpool, r2, lsl #12
 	DISPATCH_START	5
-	rsb	r3, r1, #opc_iload_0
+	rsb	r1, r1, #opc_iload_0
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	add	tmp2, ip, lsl #4
+	add	r3, ip, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_4
 	DISPATCH_NEXT
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
+        ldr     r3, [r3, #CP_OFFSET+8]
 	DISPATCH_NEXT
 .abortentry90:
-	ldr	tmp2, [tmp1, tmp2]
-	ldr	r3, [locals, r3, lsl #2]
-	DISPATCH_NEXT
-	PUSH	r3, tmp2
+	ldr	r3, [tmp1, r3]
+	ldr	r1, [locals, r1, lsl #2]
+	DISPATCH_NEXT
+	PUSH	r1, r3
 	DISPATCH_FINISH
 }
 
@@ -5570,22 +5526,22 @@
 	ldrb	r2, [jpc, #3]
 	rsb	tmp1, r0, #opc_iaccess_0
 	ldrb	ip, [jpc, #2]
-	add	tmp2, constpool, r2, lsl #12
+	add	r1, constpool, r2, lsl #12
 	DISPATCH_START	5
 	ldr	tmp1, [locals, tmp1, lsl #2]
-	add	tmp2, ip, lsl #4
+	add	r1, ip, lsl #4
 	DISPATCH_NEXT
 	SW_NPC	cmp	tmp1, #0
 	SW_NPC	beq	null_ptr_exception_jpc_4
 	DISPATCH_NEXT
-        ldr     tmp2, [tmp2, #CP_OFFSET+8]
+        ldr     r1, [r1, #CP_OFFSET+8]
 	DISPATCH_NEXT
 	POP	r3
 .abortentry91:
-	ldr	tmp2, [tmp1, tmp2]
-	DISPATCH_NEXT
-	add	tmp2, tmp2, r3
-	PUSH	tmp2
+	ldr	r1, [tmp1, r1]
+	DISPATCH_NEXT
+	add	r1, r1, r3
+	PUSH	r1
 	DISPATCH_FINISH
 }
 
@@ -6249,13 +6205,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #0
+        rsb     r1, lr, #0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         add     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6286,13 +6242,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #opc_istore_0
+        rsb     r1, lr, #opc_istore_0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         add     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6323,13 +6279,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #0
+        rsb     r1, lr, #0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         sub     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6360,13 +6316,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #opc_istore_0
+        rsb     r1, lr, #opc_istore_0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         sub     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6397,13 +6353,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #0
+        rsb     r1, lr, #0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         and     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6434,13 +6390,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #opc_istore_0
+        rsb     r1, lr, #opc_istore_0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         and     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6471,13 +6427,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #0
+        rsb     r1, lr, #0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         orr     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6508,13 +6464,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #opc_istore_0
+        rsb     r1, lr, #opc_istore_0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         orr     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6545,13 +6501,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #0
+        rsb     r1, lr, #0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         eor     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6582,13 +6538,13 @@
         DISPATCH_START  \seq_len
 	sub	r3, r3, #opc_iconst_0
 	DISPATCH_NEXT
-        rsb     tmp2, lr, #opc_istore_0
+        rsb     r1, lr, #opc_istore_0
 	DISPATCH_NEXT
         ldr     tmp1, [locals, r2, lsl #2]
 	DISPATCH_NEXT
         eor     r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6773,14 +6729,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	add	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6793,14 +6749,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	add	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6852,14 +6808,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	sub	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6872,14 +6828,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	sub	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6931,14 +6887,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	and	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -6951,14 +6907,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	and	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7010,14 +6966,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	orr	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7030,14 +6986,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	orr	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7089,14 +7045,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	eor	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7109,14 +7065,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #opc_istore_0
+	rsb	r1, lr, #opc_istore_0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	eor	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7168,14 +7124,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	add	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7188,14 +7144,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	add	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7247,14 +7203,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	sub	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7267,14 +7223,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	sub	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7326,14 +7282,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	and	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7346,14 +7302,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	and	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7405,14 +7361,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	orr	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7425,14 +7381,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	orr	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7484,14 +7440,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	eor	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
@@ -7504,14 +7460,14 @@
         DISPATCH_START  \seq_len
 	rsb	r3, r3, #opc_iload_0
 	DISPATCH_NEXT
-	rsb	tmp2, lr, #0
+	rsb	r1, lr, #0
 	ldr	tmp1, [locals, r2, lsl #2]
 	ldr	r3, [locals, r3, lsl #2]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	eor	r3, tmp1, r3
 	DISPATCH_NEXT
-        str     r3, [locals, tmp2, lsl #2]
+        str     r3, [locals, r1, lsl #2]
 	DISPATCH_FINISH
 }
 
--- a/src/cpu/zero/vm/cppInterpreter_arm.S	Wed May 16 11:21:07 2012 +0100
+++ b/src/cpu/zero/vm/cppInterpreter_arm.S	Thu May 17 13:45:50 2012 -0400
@@ -18,9 +18,9 @@
 @ 2 along with this work; if not, write to the Free Software Foundation,
 @ Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 
-#undef THUMB2EE
+#undef T2JIT
 #if !defined(DISABLE_THUMB2) && defined(HOTSPOT_ASM) && !defined(SHARK)
-#define THUMB2EE
+#define T2JIT
 #endif
 
 #ifdef HOTSPOT_ASM
@@ -29,7 +29,7 @@
 #define ARMv4
 #endif
 
-#if defined(SHARK) || defined(THUMB2EE)
+#if defined(SHARK) || defined(T2JIT)
 
 #define USE_COMPILER
 
@@ -41,8 +41,8 @@
 #define MP_COMPILE_THRESHOLD    0x10000         // 65536 - must be a single MOV constant
 #define UP_COMPILE_THRESHOLD    0x30000         // 196608 - must be a single MOV constant
 #else
-#define MP_COMPILE_THRESHOLD    0x2700		// 10000 - must be a single MOV constant
-#define UP_COMPILE_THRESHOLD    0x2700		// 10000 - must be a single MOV constant
+#define MP_COMPILE_THRESHOLD    0x1380		// ~ 5000 - must be a single MOV constant
+#define UP_COMPILE_THRESHOLD    0x1380		// ~ 5000 - must be a single MOV constant
 #endif
 
 #define MAX_FG_METHOD_SIZE      500
@@ -54,7 +54,7 @@
 #define DISABLE_BG_COMP_ON_NON_MP
 #endif
 
-#ifdef THUMB2EE
+#ifdef T2JIT
 #define FREQ_COUNT_OVERFLOW Thumb2_Compile
 #else
 #define FREQ_COUNT_OVERFLOW _ZN18InterpreterRuntime26frequency_counter_overflowEP10JavaThreadPh
@@ -78,12 +78,15 @@
 #define LEAF_STACK_SIZE	200
 #define STACK_SPARE	40
 
+#define TBIT 1
+	
 #define stack		r4
 #define	jpc		r5
 #define dispatch	r6
 #define locals		r7
 #define istate		r8
 #define constpool	r9
+#define thread		r10
 #define arm_sp		r13
 
 #define tmp_xxx		r7
@@ -91,17 +94,23 @@
 #define tmp_vvv		r9
 
 #define tmp1		r11
-#define tmp2		r10
 
 #define regset		r4,r5,r6,r7,r9,r10,r11
 #define fast_regset	r8
 
+#define Rframe	r7
+
+#define FRAME_METHOD		(ISTATE_METHOD-ISTATE_NEXT_FRAME)
+#define FRAME_CONSTANTS		(ISTATE_CONSTANTS-ISTATE_NEXT_FRAME)
+#define FRAME_BCP		(ISTATE_BCP-ISTATE_NEXT_FRAME)
+#define FRAME_STACK_LIMIT	(ISTATE_STACK_LIMIT-ISTATE_NEXT_FRAME)
+#define FRAME_LOCALS		(ISTATE_LOCALS-ISTATE_NEXT_FRAME)
+#define FRAME_STACK		(ISTATE_STACK-ISTATE_NEXT_FRAME)
+
 #include "offsets_arm.s"
 
 #define last_implemented_bytecode 201
 
-#define CODE_ALIGN_SIZE 64
-
 	.macro	ALIGN_CODE
 	.align	6
 	.endm
@@ -118,6 +127,18 @@
 	.align	2
 	.endm
 
+#define SLOW_ENTRY_OFFSET 24
+#define FAST_ENTRY_OFFSET 40
+
+	.macro	SLOW_ENTRY
+	ALIGN_CODE
+	.word	0, 0, 0, 0, 0, 0
+	.endm
+
+	.macro	FAST_ENTRY
+	ALIGN_CODE
+	.endm
+
 @------------------------------------------------
 @ Software NULL Pointer check macro.
 @ Usage:
@@ -259,7 +280,7 @@
 @	T2	<thumb2 specific code>
 @------------------------------------------------
 	.macro	T2	p1, p2, p3, p4
-#ifdef THUMB2EE
+#ifdef T2JIT
   .ifnes "\p4", ""
         \p1 \p2, \p3, \p4
   .else
@@ -389,6 +410,11 @@
 #endif
 	.endm
 	
+	.macro	LOAD_ISTATE
+	ldr	istate, [thread, #THREAD_TOP_ZERO_FRAME]
+	sub	istate, istate, #ISTATE_NEXT_FRAME
+	.endm
+
 	.macro	CACHE_JPC
 	ldr	jpc, [istate, #ISTATE_BCP]
 	.endm
@@ -405,10 +431,18 @@
 	ldr	constpool, [istate, #ISTATE_CONSTANTS]
 	.endm
 
+	.macro	DECACHE_STACK_USING_FRAME
+	str	stack, [Rframe, #FRAME_STACK]
+	.endm
+
 	.macro	DECACHE_STACK
 	str	stack, [istate, #ISTATE_STACK]
 	.endm
 
+	.macro	DECACHE_JPC_USING_FRAME
+	str	jpc, [Rframe, #FRAME_BCP]
+	.endm
+
 	.macro	DECACHE_JPC
 	str	jpc, [istate, #ISTATE_BCP]
 	.endm
@@ -487,14 +521,16 @@
 	@ This macro calls a user-supplied my_trace routine.  It
 	@ passes the current JPC as argument zero.  It can be safely
 	@ inserted at any point in the interpreter.
-	.macro TRACE
-	stmfd	sp!, {r0, r1, r2, r3, r4, lr}
+ 	.macro TRACE
+	stmfd	sp!, {r0, r1, r2, r3, r4, lr, ip}
 	mrs	r4, cpsr
 	mov	r0, jpc
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
+	sub	r1, r1, #ISTATE_NEXT_FRAME
 	ldr	r2, =my_trace
 	blx	r2
 	msr	cpsr, r4
-	ldmfd	sp!, {r0, r1, r2, r3, r4, lr}	
+	ldmfd	sp!, {r0, r1, r2, r3, r4, lr, ip}	
 	.endm
 	
 	.macro	DISPATCH_FINISH
@@ -813,39 +849,32 @@
         .word   accessor_entry                  // implementation of java.lang.ref.Reference.get()
 3:
 	
-	ALIGN_CODE
+	SLOW_ENTRY
 native_entry_synchronized:
-	b	fast_native_entry_synchronized
-
-	ALIGN_CODE
+	mov	r2, thread
+	b	_ZN14CppInterpreter12native_entryEP13methodOopDesciP6Thread
+
+	FAST_ENTRY
 fast_native_entry_synchronized:
+	mov	r2, thread
 	b	_ZN14CppInterpreter12native_entryEP13methodOopDesciP6Thread
 
-	ALIGN_CODE
+	SLOW_ENTRY
 empty_entry:
-	b	fast_empty_entry
-
-	ALIGN_CODE
-fast_empty_entry:
-	ldr	r3, .L1359
-	ldr	r1, .L1359+4
-.LPIC19:
-	add	r3, pc, r3
-
-	ldr	ip, [r3, r1]
-	ldr	r3, [ip, #0]
-	cmp	r3, #0
-	bne	normal_entry_synchronized
-
 	ldrh	r3, [r0, #42]
 	ldr	r1, [r2, #THREAD_JAVA_SP]
 	add	r1, r1, r3, lsl #2
 	str	r1, [r2, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	bx	lr
-.L1359:
-	.word	_GLOBAL_OFFSET_TABLE_-(.LPIC19+8)
-	.word	_ZN20SafepointSynchronize6_stateE(GOT)
+
+	FAST_ENTRY
+fast_empty_entry:
+	ldrh	r3, [r0, #42]
+	ldr	r1, [thread, #THREAD_JAVA_SP]
+	add	r1, r1, r3, lsl #2
+	str	r1, [thread, #THREAD_JAVA_SP]
+	bx	lr
 
 @ ---- START execute.s ---------------------------------------------------------------------
 
@@ -885,7 +914,7 @@
 	bx	lr
 #endif // HW_NULL_PTR_CHECK
 2:
-#ifdef THUMB2EE
+#ifdef T2JIT
 	b	Thumb2_Check_Null
 #else
 	mov	r0, #0
@@ -994,14 +1023,15 @@
 #endif
 
 
-	ALIGN_CODE
+	SLOW_ENTRY
 native_entry:
 	stmfd	arm_sp!, {regset, lr}
+	mov	thread, r2
 	bl	fast_native_entry	
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmia	sp!, {regset, pc}
 
-	ALIGN_CODE
+	FAST_ENTRY
 fast_native_entry:
 	adrl	ip, dispatch_init_adcon
 	mov	r11, r0
@@ -1010,12 +1040,12 @@
 	add	dispatch, dispatch, ip
 	add	dispatch, dispatch, r7
 	ldrh	r1, [r11, #METHOD_SIZEOFPARAMETERS]
-	ldr	r4, [r2, #THREAD_JAVA_SP]
-	ldr	r3, [r2, #THREAD_TOP_ZERO_FRAME]
+	ldr	r4, [thread, #THREAD_JAVA_SP]
+	ldr	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 	mov	r0, #0
 	mov	ip, #INTERPRETER_FRAME
 	sub	r9, r4, #FRAME_SIZE
-	str	r9, [r2, #THREAD_JAVA_SP]	@ drop stack
+	str	r9, [thread, #THREAD_JAVA_SP]	@ drop stack
 	sub	r5, r9, #4		@ stack limit = r9 - 4
 	str	r3, [r9, #ISTATE_NEXT_FRAME]
 	str	ip, [r9, #ISTATE_FRAME_TYPE]
@@ -1024,51 +1054,47 @@
 	str	r9, [r9, #ISTATE_STACK_BASE]
 	str	r0, [r9, #ISTATE_OOP_TEMP]
 
-@	str	r0, [r9, #ISTATE_CALLEE]
-@	str	r0, [r9, #ISTATE_PREV_LINK]
 	str	r0, [r9, #ISTATE_MSG]
-@	str	r0, [r9, #ISTATE_MDX]
-
-	ldr	r10, [r11, #METHOD_CONSTANTS]
+
+	ldr	ip, [r11, #METHOD_CONSTANTS]
 	sub	r7, r4, #4
 	mov	r5, #0
 	add	r7, r7, r1, lsl #2
 
-	ldr	r10, [r10, #CONSTANTPOOL_CACHE]
-
-	str	r2, [r9, #ISTATE_THREAD]
+	ldr	ip, [ip, #CONSTANTPOOL_CACHE]
+
+	str	thread, [r9, #ISTATE_THREAD]
 	str	r5, [r9, #ISTATE_BCP]
 	str	r7, [r9, #ISTATE_LOCALS]
-	str	r10, [r9, #ISTATE_CONSTANTS]
+	str	ip, [r9, #ISTATE_CONSTANTS]
 	str	r11, [r9, #ISTATE_METHOD]
 	str     r9, [r9, #ISTATE_SELF_LINK]
 
-@	stmia	r9, {r2, r5, r7, r10, r11}
-	ldr	r1, [r2, #THREAD_STACK_SIZE]
-	ldr	r3, [r2, #THREAD_STACK_BASE]
+	ldr	r1, [thread, #THREAD_STACK_SIZE]
+	ldr	r3, [thread, #THREAD_STACK_BASE]
 	add	r0, r9, #72
-	mov	r9, r2
+
 	rsb	r3, r1, r3
 	rsb	r3, r3, arm_sp
 	cmp	r3, #4096
-	str	r0, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	r0, [thread, #THREAD_TOP_ZERO_FRAME]
 	ldr	r5, [r11, #METHOD_SIGNATUREHANDLER]
 	blt	.fast_native_entry_throw_stack_overflow
 	cmp	r5, #0
 	bne	.fast_native_entry_got_handleraddr
-	str	r5, [r9, #THREAD_LAST_JAVA_SP] @ r5 is zero at this point
-	str	r0, [r9, #THREAD_LAST_JAVA_FP]
-	ldr	r0, [r9, #THREAD_JAVA_SP]
-	str	r0, [r9, #THREAD_LAST_JAVA_SP]
-	mov	r0, r9
+	str	r5, [thread, #THREAD_LAST_JAVA_SP] @ r5 is zero at this point
+	str	r0, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r0, [thread, #THREAD_JAVA_SP]
+	str	r0, [thread, #THREAD_LAST_JAVA_SP]
+	mov	r0, thread
 	mov	r1, r11
 	bl	_ZN18InterpreterRuntime19prepare_native_callEP10JavaThreadP13methodOopDesc
-	ldr	r11, [r9, #THREAD_TOP_ZERO_FRAME]
-	ldr	r1, [r9, #THREAD_PENDING_EXC]
-	str	r5, [r9, #THREAD_LAST_JAVA_SP]  @ r5 is zero at this point
-	str	r5, [r9, #THREAD_LAST_JAVA_FP]
-	ldr	r5, [r9, #THREAD_JAVA_SP]
-	str	r5, [r9, #THREAD_LAST_JAVA_SP]
+	ldr	r11, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	str	r5, [thread, #THREAD_LAST_JAVA_SP]  @ r5 is zero at this point
+	str	r5, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r5, [thread, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_LAST_JAVA_SP]
 	ldr	r11, [r11, #-72 + ISTATE_METHOD]
 	cmp	r1, #0
 	bne	.fast_native_entry_exception
@@ -1077,25 +1103,25 @@
 	ldr	r2, [dispatch, #InterpreterRuntime_slow_signature_handler_Address-XXX]
 	cmp	r5, r2
 	bne	.fast_native_entry_get_handler
-	ldr	r3, [r9, #THREAD_TOP_ZERO_FRAME]
+	ldr	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 	stmfd	sp!, {r2}
 	mov	r2, #0
-        str     r2, [r9, #THREAD_LAST_JAVA_SP]
+        str     r2, [thread, #THREAD_LAST_JAVA_SP]
 	ldmfd	sp!, {r2}
-	mov	r0, r9
-	str	r3, [r9, #THREAD_LAST_JAVA_FP]
-	ldr	r3, [r9, #THREAD_JAVA_SP]
-	str	r3, [r9, #THREAD_LAST_JAVA_SP]
+	mov	r0, thread
+	str	r3, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r3, [thread, #THREAD_JAVA_SP]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 	mov	r3, r2
 	mov	r1, r11
 	bl	_ZN18InterpreterRuntime22slow_signature_handlerEP10JavaThreadP13methodOopDescPiS4_
-	ldr	r11, [r9, #THREAD_TOP_ZERO_FRAME]
-	ldr	r1, [r9, #THREAD_PENDING_EXC]
+	ldr	r11, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
 	mov	r3, #0
 	ldr	r11, [r11, #-72 + ISTATE_METHOD]
 	cmp	r1, #0
-	str	r3, [r9, #THREAD_LAST_JAVA_SP]
-	str	r3, [r9, #THREAD_LAST_JAVA_FP]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	str	r3, [thread, #THREAD_LAST_JAVA_FP]
 	mov	r5, r0
 	bne	.fast_native_entry_exception
 .fast_native_entry_get_handler:
@@ -1113,14 +1139,14 @@
 	add	r1, r5, #SIZEOF_FFI_CIF
 
 	sub	arm_sp, arm_sp, lr, lsl #2
-	add	r2, r9, #THREAD_JNI_ENVIRONMENT
+	add	r2, thread, #THREAD_JNI_ENVIRONMENT
 
 	mov	lr, arm_sp
 	str	r2, [lr], #4
 
 #ifdef __ARM_PCS_VFP
-	mov	r10, #0xff	@ bitmap for floating-point register set
-	orr	r10, #0xff00
+	mov	thread, #0xff	@ bitmap for floating-point register set
+	orr	thread, #0xff00
 #endif	
 	ldr	r2, [r11, #METHOD_ACCESSFLAGS]
 	add	r1, r1, #4
@@ -1185,18 +1211,18 @@
 	.endm
 	
 .fast_copy_double:
-	orrs	r10, r10
+	orrs	thread, thread
 	ldreq	r2, [r7], #-4	
 	beq	vm_fatal_error
-	FIND_LOWEST_BIT_PAIR r0, r10
+	FIND_LOWEST_BIT_PAIR r0, thread
 	adrl	r2, .copy_double_table
 	add	pc, r2, r0, asl#5
 
 .fast_copy_float:
-	orrs	r10, r10
+	orrs	thread, thread
 	ldreq	r2, [r7], #-4	
 	beq	vm_fatal_error
-	FIND_LOWEST_BIT r0, r10
+	FIND_LOWEST_BIT r0, thread
 	adr	r2, .copy_float_table
 	add	pc, r2, r0, asl#6
 #else
@@ -1238,17 +1264,18 @@
 	bge	.fast_copy_args
 
 .fast_no_args:
-	ldr	r0, [r9, #THREAD_TOP_ZERO_FRAME]
+	ldr     thread, [r9, #ISTATE_THREAD]
+	ldr	r0, [thread, #THREAD_TOP_ZERO_FRAME]
 	mov	r2, #_thread_in_native
 
 	mov	ip, #0
-	str	ip, [r9, #THREAD_LAST_JAVA_SP]
-
-	str	r0, [r9, #THREAD_LAST_JAVA_FP]
-	str	r2, [r9, #THREAD_STATE]
-
-	ldr	r2, [r9, #THREAD_JAVA_SP]
-	str	r2, [r9, #THREAD_LAST_JAVA_SP]
+	str	ip, [thread, #THREAD_LAST_JAVA_SP]
+
+	str	r0, [thread, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_STATE]
+
+	ldr	r2, [thread, #THREAD_JAVA_SP]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
 
 	ldr	ip, [r11, #METHOD_NATIVEHANDLER]
 	ldrh	r11, [r11, #METHOD_SIZEOFPARAMETERS]
@@ -1260,11 +1287,11 @@
 	mov	arm_sp, r4
 
 	ldr	r3, [dispatch, #SafePointSynchronize_state_Address-XXX]
-	str	ip, [r9, #THREAD_STATE]
+	str	ip, [thread, #THREAD_STATE]
 
 	ldr	r3, [r3, #0]
 	cmp	r3, #0
-	ldreq	r3, [r9, #THREAD_SUSPEND_FLAGS]
+	ldreq	r3, [thread, #THREAD_SUSPEND_FLAGS]
 	cmpeq	r3, #0
 	bne	.fast_native_entry_do_special
 
@@ -1272,14 +1299,14 @@
 	mov	r3, #_thread_in_Java
 	mov	r2, #0
 
-	str	r3, [r9, #THREAD_STATE]
-	str	r2, [r9, #THREAD_LAST_JAVA_SP]
-	str	r2, [r9, #THREAD_LAST_JAVA_FP]
+	str	r3, [thread, #THREAD_STATE]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	str	r2, [thread, #THREAD_LAST_JAVA_FP]
 
 	add	r2, r5, #SIZEOF_FFI_CIF
 	ldr	r3, [r5, #4]
 
-	ldr	r5, [r9, #THREAD_TOP_ZERO_FRAME]
+	ldr	r5, [thread, #THREAD_TOP_ZERO_FRAME]
 
 	ldr	lr, [r5], #4
 
@@ -1289,7 +1316,7 @@
 	adr	r3, .return_type_table
 
 	ldrh	r2, [ip, #6]
-	ldr	ip, [r9, #THREAD_ACTIVE_HANDLES]
+	ldr	ip, [thread, #THREAD_ACTIVE_HANDLES]
 
 	mov	tmp1, #0
 	ldr	pc, [r3, r2, lsl #2]
@@ -1319,64 +1346,64 @@
 #ifdef __ARM_PCS_VFP
 .fast_native_return_double:
 	fsts	s0, [r5, #-8]
-	str	lr, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
 	fsts	s1, [r5, #-4]
 	add	r5, #-8
-	str	r5, [r9, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 .fast_native_return_float:
 	fsts	s0, [r5, #-4]
-	str	lr, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
 	add	r5, #-4
-	str	r5, [r9, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 #endif
 .fast_native_return_dw:
 	str	r0, [r5, #-8]!
-	str	lr, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
 	str	r1, [r5, #4]
-	str	r5, [r9, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 .fast_native_return_byte:
 	mov	r0, r0, lsl #24
-	str	lr, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
 	mov	r0, r0, asr #24
 	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
 	str	r0, [r5, #-4]!
-	str	r5, [r9, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 .fast_native_return_char:
 	mov	r0, r0, lsl #16
-	str	lr, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
 	mov	r0, r0, lsr #16
 	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
 	str	r0, [r5, #-4]!
-	str	r5, [r9, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 .fast_native_return_bool:
 	ands	r0, r0, #255
-	str	lr, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
 	movne	r0, #1
 	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
 	str	r0, [r5, #-4]!
-	str	r5, [r9, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 .fast_native_return_obj:
 	cmp	r0, #0
 	ldrne	r0, [r0]
 	str	r0, [r5, #-4]!
-	str	lr, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
-	str	r5, [r9, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 .fast_native_return_short:
@@ -1385,25 +1412,25 @@
 .fast_native_return_w:
 	str	r0, [r5, #-4]!
 .fast_native_return_void:
-	str	lr, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	lr, [thread, #THREAD_TOP_ZERO_FRAME]
 	str	tmp1, [ip, #JNIHANDLEBLOCK_TOP]
 .fast_native_exit:
-	str	r5, [r9, #THREAD_JAVA_SP]
+	str	r5, [thread, #THREAD_JAVA_SP]
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 
 .fast_native_entry_throw_stack_overflow:
-	str	r0, [r9, #THREAD_LAST_JAVA_FP]
-	mov	r0, r9
+	str	r0, [thread, #THREAD_LAST_JAVA_FP]
+	mov	r0, thread
 	bl	_ZN18InterpreterRuntime24throw_StackOverflowErrorEP10JavaThread
 	mov	r3, #0
-	ldr	r1, [r9, #THREAD_PENDING_EXC]
-	str	r3, [r9, #THREAD_LAST_JAVA_FP]
-	str	r3, [r9, #THREAD_LAST_JAVA_SP]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
+	str	r3, [thread, #THREAD_LAST_JAVA_FP]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 .fast_native_entry_exception:
-	ldr	r5, [r9, #THREAD_TOP_ZERO_FRAME]
+	ldr	r5, [thread, #THREAD_TOP_ZERO_FRAME]
 	ldr	r3, [r5], #4
-	str	r3, [r9, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 
 	ldrh	r3, [r11, #METHOD_SIZEOFPARAMETERS]
 	add	r5, r5, r3, lsl #2
@@ -1411,7 +1438,7 @@
 
 .fast_native_entry_do_special:
 	stmdb	arm_sp!, {r0, r1}
-	mov	r0, r9
+	mov	r0, thread
 	bl	_ZN10JavaThread40check_special_condition_for_native_transEPS_
 	ldmia	arm_sp!, {r0, r1}
 	b	.fast_native_entry_do_return
@@ -1536,69 +1563,69 @@
 
 	Opcode	goto
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
 branch_taken:
-        orr     tmp1, tmp1, r1, lsl #8
-        DISPATCH_START_REG	tmp1
-	cmp	tmp1, #0
+        orr     r2, r2, r1, lsl #8
+        DISPATCH_START_REG	r2
+	cmp	r2, #0
 	ble	do_backedge
 	DISPATCH_FINISH
 
 branch_taken_unsafe:
 	mov	r2, r2, lsl #24
-	orr	tmp1, r1, r2, asr #16
-        DISPATCH_START_REG	tmp1
-  USEC	cmp	tmp1, #0
+	orr	r2, r1, r2, asr #16
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
   USEC	ble	do_backedge
 	DISPATCH_FINISH
 
 branch_taken_unsafe_1:
 	add	jpc, jpc, #1
-	orr	tmp1, ip, r1, lsl #8
-        DISPATCH_START_REG	tmp1
-  USEC	cmp	tmp1, #0
+	orr	r2, ip, r1, lsl #8
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
   USEC	ble	do_backedge
 	DISPATCH_FINISH
 
 branch_taken_unsafe_2:
 	add	jpc, jpc, #2
-	orr	tmp1, ip, r1, lsl #8
-        DISPATCH_START_REG	tmp1
-  USEC	cmp	tmp1, #0
+	orr	r2, ip, r1, lsl #8
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
   USEC	ble	do_backedge
 	DISPATCH_FINISH
 
 branch_taken_unsafe_3:
 	add	jpc, jpc, #3
-	orr	tmp1, ip, r1, lsl #8
-        DISPATCH_START_REG	tmp1
-  USEC	cmp	tmp1, #0
+	orr	r2, ip, r1, lsl #8
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
   USEC	ble	do_backedge
 	DISPATCH_FINISH
 
 branch_taken_unsafe_4:
 	add	jpc, jpc, #4
-	orr	tmp1, ip, r1, lsl #8
-        DISPATCH_START_REG	tmp1
-  USEC	cmp	tmp1, #0
+	orr	r2, ip, r1, lsl #8
+        DISPATCH_START_REG	r2
+  USEC	cmp	r2, #0
   USEC	ble	do_backedge
 	DISPATCH_FINISH
 
 do_backedge:
-  USEC	ldr	tmp2, [istate, #ISTATE_METHOD]
+  USEC	ldr	tmp1, [istate, #ISTATE_METHOD]
   OSR	ldr	lr, [dispatch, #InterpreterInvocationLimit_Address-XXX]
-  USEC	ldr	r1, [tmp2, #METHOD_BACKEDGECOUNTER]
-  USEC	ldr	ip, [tmp2, #METHOD_INVOCATIONCOUNTER]
+  USEC	ldr	r1, [tmp1, #METHOD_BACKEDGECOUNTER]
+  USEC	ldr	ip, [tmp1, #METHOD_INVOCATIONCOUNTER]
   USEC	add	r1, r1, #INVOCATIONCOUNTER_COUNTINCREMENT
   OSR	ldr	lr, [lr]
   USEC	add	ip, ip, #INVOCATIONCOUNTER_COUNTINCREMENT
-  USEC	str	r1, [tmp2, #METHOD_BACKEDGECOUNTER]
-#ifdef THUMB2EE
+  USEC	str	r1, [tmp1, #METHOD_BACKEDGECOUNTER]
+#ifdef T2JIT
   OSR	cmp	r1, lr
 #else
   OSR	cmp	r1, lr, lsl #2
 #endif
-  USEC	str	ip, [tmp2, #METHOD_INVOCATIONCOUNTER]
+  USEC	str	ip, [tmp1, #METHOD_INVOCATIONCOUNTER]
   OSR	bcs	do_osr
 
 osr_continue:
@@ -1613,7 +1640,7 @@
 do_synchronize:
 	DECACHE_JPC
 	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	Helper_SafePoint
 	CACHE_CP
 	CACHE_JPC
@@ -1623,12 +1650,12 @@
 
 #ifdef ON_STACK_REPLACEMENT
 
-#ifdef THUMB2EE
+#ifdef T2JIT
 do_osr:
-	ldr	r3, [tmp2, #METHOD_CONSTMETHOD]
+	ldr	r3, [tmp1, #METHOD_CONSTMETHOD]
 	DECACHE_JPC
 	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	sub	r1, jpc, r3
 	sub	r1, r1, #CONSTMETHOD_CODEOFFSET
 	bl	FREQ_COUNT_OVERFLOW
@@ -1648,27 +1675,26 @@
 	cmp	ip, #0
 	beq	osr_continue
 
-	ldr	r3, [tmp2, #METHOD_CONSTMETHOD]
+	ldr	r3, [tmp1, #METHOD_CONSTMETHOD]
 	DECACHE_JPC
 	ldrh	r3, [r3, #CONSTMETHOD_CODESIZE]
 	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
-	sub	r1, jpc, tmp1
+	mov	r0, thread
+	sub	r1, jpc, r2
 	cmp	r3, #MAX_FG_METHOD_SIZE
 	bcc	1f
-	ldr	tmp2, [dispatch, #BackgroundCompilation_Address-XXX]
+	ldr	tmp1, [dispatch, #BackgroundCompilation_Address-XXX]
 	mov	r3, #1
-	ldr	r5, [tmp2]
-	str	r3, [tmp2]
+	ldr	r5, [tmp1]
+	str	r3, [tmp1]
 	bl	FREQ_COUNT_OVERFLOW
-	str	r5, [tmp2]
+	str	r5, [tmp1]
 	b	2f
 1:
 	bl	FREQ_COUNT_OVERFLOW
 2:
-	ldr	r3, [istate, #ISTATE_THREAD]
 	CACHE_CP
-	ldr	r1, [r3, #THREAD_PENDING_EXC]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
 	CACHE_JPC
 	cmp	r1, #0
 	bne	handle_exception
@@ -1683,20 +1709,19 @@
 
 osr_migrate:
 	ldr	tmp1, [r0, #128]	@ osr_method->osr_entry()
-	mov	r0, r3
+	mov	r0, thread
 	bl	_ZN13SharedRuntime19OSR_migration_beginEP10JavaThread
 	mov	r1, r0
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldrh	lr, [r0, #METHOD_MAXLOCALS]
 	ldrh	ip, [r0, #METHOD_SIZEOFPARAMETERS]
-	ldr	r3, [istate, #ISTATE_THREAD]
 	sub	lr, lr, ip
-	ldr	r2, [r3, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
 	add	ip, r2, #4
 	ldr	r2, [r2]
 	add	ip, ip, lr, lsl #2
-	str	r2, [r3, #THREAD_TOP_ZERO_FRAME]
-	str	ip, [r3, #THREAD_JAVA_SP]
+	str	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	ip, [thread, #THREAD_JAVA_SP]
 	mov	r2, tmp1
 @ r0 = method
 @ r1 = osr_buf
@@ -1707,7 +1732,7 @@
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 
-#endif // THUMB2EE
+#endif // T2JIT
 
 #endif // ON_STACK_REPLACEMENT
 
@@ -1715,7 +1740,7 @@
 	Opcode	ifnull
         POP     r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, #0
 	beq	branch_taken
 	DISPATCH	3
@@ -1724,7 +1749,7 @@
 	Opcode	ifnonnull
         POP     r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, #0
 	bne	branch_taken
 	DISPATCH	3
@@ -1732,7 +1757,7 @@
 	Opcode	iflt
         POP     r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, #0
 	blt	branch_taken
 	DISPATCH	3
@@ -1740,7 +1765,7 @@
 	Opcode	ifge
         POP     r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, #0
 	bge	branch_taken
 	DISPATCH	3
@@ -1748,7 +1773,7 @@
 	Opcode	ifgt
         POP     r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, #0
 	bgt	branch_taken
 	DISPATCH	3
@@ -1756,7 +1781,7 @@
 	Opcode	ifle
         POP     r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, #0
 	ble	branch_taken
 	DISPATCH	3
@@ -1765,7 +1790,7 @@
 	Opcode	if_acmpeq
         POP    r2, r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, r2
 	beq	branch_taken
 	DISPATCH	3
@@ -1774,7 +1799,7 @@
 	Opcode	if_acmpne
         POP    r2, r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, r2
 	bne	branch_taken
 	DISPATCH	3
@@ -1782,7 +1807,7 @@
 	Opcode	if_icmplt
         POP    r2, r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, r2
 	blt	branch_taken
 	DISPATCH	3
@@ -1790,7 +1815,7 @@
 	Opcode	if_icmpge
         POP    r2, r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, r2
 	bge	branch_taken
 	DISPATCH	3
@@ -1798,7 +1823,7 @@
 	Opcode	if_icmpgt
         POP    r2, r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, r2
 	bgt	branch_taken
 	DISPATCH	3
@@ -1806,7 +1831,7 @@
 	Opcode	if_icmple
         POP    r2, r3
         ldrsb   r1, [jpc, #1]
-        ldrb    tmp1, [jpc, #2]
+        ldrb    r2, [jpc, #2]
         cmp     r3, r2
 	ble	branch_taken
 	DISPATCH	3
@@ -1823,7 +1848,7 @@
 	bne	handle_return
 	DECACHE_JPC
 	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	Helper_SafePoint
 	CACHE_JPC
 	cmp	r0, #0
@@ -1833,22 +1858,23 @@
 resolve_get_put:
 	mov	r1, r0
 	mov	tmp1, lr
-        ldr     r0, [istate, #ISTATE_THREAD]
+	@ stmfd	arm_sp!, {lr}
+	mov	r0, thread
 	DECACHE_JPC
         DECACHE_STACK
        	bl      _ZN18InterpreterRuntime15resolve_get_putEP10JavaThreadN9Bytecodes4CodeE
-        ldr     r0, [istate, #ISTATE_THREAD]
+        ldr     r3, [thread, #THREAD_PENDING_EXC]
 	CACHE_JPC
-        ldr     r3, [r0, #THREAD_PENDING_EXC]
 	CACHE_CP
         cmp     r3, #0
 	mov	lr, tmp1
+	@ ldmfd	arm_sp!, {lr}
         bne     getfield_exception
 @ Now restart the getfield
         ldrb    r3, [jpc, #1]
         ldrb    r2, [jpc, #2]
         orr     r3, r3, r2, lsl #8      @ r3 = index
-	add	tmp2, constpool, r3, lsl #4	@ tmp2 = cache
+	add	tmp1, constpool, r3, lsl #4	@ tmp1 = cache
 	bx	lr
 
 accessor_non_w:
@@ -1875,29 +1901,29 @@
 	add	r0, r3, r1
 	ldm	r0, {r0, r1}
 	sub	ip, ip, #4
-	str	ip, [r2, #THREAD_JAVA_SP]
+	str	ip, [thread, #THREAD_JAVA_SP]
 	stmia	ip, {r0, r1}
 	mov	r0, #0	@ deoptimized_frames = 0
 	bx	lr
 
 	Opcode	getfield
 	ldrb	r1, [jpc, #2]
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
-	ldr	r3, [tmp2, #CP_OFFSET]
+        add     tmp1, constpool, r1, lsl #12
+	add	tmp1, tmp1, r2, lsl #4
+	ldr	r3, [tmp1, #CP_OFFSET]
         and     r3, r3, #0x00ff0000
         cmp     r3, #opc_getfield << 16
         blne    resolve_get_put
-  NFBC	POP	tmp1
-        ldr     r2, [tmp2, #CP_OFFSET+12]
-  NFBC	cmp	tmp1, #0
+  NFBC	POP	r3
+        ldr     r2, [tmp1, #CP_OFFSET+12]
+  NFBC	cmp	r3, #0
   NFBC	beq	null_ptr_exception
-  NFBC	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+  NFBC	ldr	tmp1, [tmp1, #CP_OFFSET+8]
         movs    r2, r2, lsr #29
   FBC	movhi	r0, #opc_igetfield
         bls     getfield_non_w
-  NFBC	ldr	tmp2, [tmp1, tmp2]
-  NFBC	PUSH	tmp2
+  NFBC	ldr	tmp1, [r3, tmp1]
+  NFBC	PUSH	tmp1
   NFBC	DISPATCH 3
 
 #ifdef FAST_BYTECODES
@@ -1927,34 +1953,34 @@
 	b	rewrite_bytecode
 #else
 getfield_sh:
-	ldrsh	tmp2, [tmp1, tmp2]
-	PUSH	tmp2
+	ldrsh	tmp1, [r3, tmp1]
+	PUSH	tmp1
 	DISPATCH 3
 getfield_h:
-	ldrh	tmp2, [tmp1, tmp2]
-	PUSH	tmp2
+	ldrh	tmp1, [r3, tmp1]
+	PUSH	tmp1
 	DISPATCH 3
 getfield_sb:
-	ldrsb	tmp2, [tmp1, tmp2]
-	PUSH	tmp2
+	ldrsb	tmp1, [r3, tmp1]
+	PUSH	tmp1
 	DISPATCH 3
 getfield_dw:
-	add	tmp1, tmp1, tmp2
-	ldm	tmp1, {r2, tmp2}
-	PUSH	r2, tmp2
+	add	r3, r3, tmp1
+	ldm	r3, {r2, tmp1}
+	PUSH	r2, tmp1
 	DISPATCH 3
 #endif
 
 	Opcode	putfield
 	ldrb	r1, [jpc, #2]
-        add     tmp2, constpool, r1, lsl #12
-	add	tmp2, tmp2, r2, lsl #4
-	ldr	r3, [tmp2, #CP_OFFSET]
+        add     tmp1, constpool, r1, lsl #12
+	add	tmp1, tmp1, r2, lsl #4
+	ldr	r3, [tmp1, #CP_OFFSET]
         and     r3, r3, #0xff000000
         cmp     r3, #opc_putfield << 24
         blne    resolve_get_put
-        ldr     r2, [tmp2, #CP_OFFSET+12]
-  NFBC	ldr	tmp2, [tmp2, #CP_OFFSET+8]
+        ldr     r2, [tmp1, #CP_OFFSET+12]
+  NFBC	ldr	tmp1, [tmp1, #CP_OFFSET+8]
         movs    r2, r2, lsr #29
 	bls	putfield_non_w
   FBC   mov	r0, #opc_iputfield
@@ -1965,7 +1991,7 @@
   NFBC	POP	r2, r3
   NFBC	cmp	r3, #0
   NFBC	beq	null_ptr_exception
-  NFBC	str	r2, [r3, tmp2]
+  NFBC	str	r2, [r3, tmp1]
   NFBC	DISPATCH 3
 
 putfield_non_w:
@@ -1991,26 +2017,26 @@
 	POP	r2, r3
 	cmp	r3, #0
 	beq	null_ptr_exception
-	strh	r2, [r3, tmp2]
+	strh	r2, [r3, tmp1]
 	DISPATCH 3
 putfield_sb:
 	POP	r2, r3
 	cmp	r3, #0
 	beq	null_ptr_exception
-	strb	r2, [r3, tmp2]
+	strb	r2, [r3, tmp1]
 	DISPATCH 3
 putfield_dw:
 	POP	r2, r3, lr
 	cmp	lr, #0
 	beq	null_ptr_exception
-	add	tmp2, lr, tmp2
-	stm	tmp2, {r2, r3}
+	add	tmp1, lr, tmp1
+	stm	tmp1, {r2, r3}
 	DISPATCH 3
 putfield_a:
 	POP	r2, r3
 	cmp	r3, #0
 	beq	null_ptr_exception
-	str	r2, [r3, tmp2]
+	str	r2, [r3, tmp1]
 	mov	r0, r3
 	bl	Helper_aputfield
 	DISPATCH 3
@@ -2018,167 +2044,159 @@
 
 getstatic_sh:
 	DISPATCH_START	3
-	ldrsh	tmp2, [r3, lr]
+	ldrsh	tmp1, [r3, lr]
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	tmp1
 	DISPATCH_FINISH
 getstatic_h:
 	DISPATCH_START	3
-	ldrh	tmp2, [r3, lr]
+	ldrh	tmp1, [r3, lr]
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	tmp1
 	DISPATCH_FINISH
 getstatic_sb:
 	DISPATCH_START	3
-	ldrsb	tmp2, [r3, lr]
+	ldrsb	tmp1, [r3, lr]
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	tmp1
 	DISPATCH_FINISH
 getstatic_dw:
 	DISPATCH_START	3
 	add	r3, r3, lr
-	ldm	r3, {r2, tmp2}
+	ldm	r3, {r2, tmp1}
 	DISPATCH_NEXT
-	PUSH	r2, tmp2
+	PUSH	r2, tmp1
 	DISPATCH_FINISH
 getstatic_w:
 	DISPATCH_START	3
-	ldr	tmp2, [r3, lr]
+	ldr	tmp1, [r3, lr]
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	tmp1
 	DISPATCH_FINISH
 
 putstatic_sh:
 putstatic_h:
 	DISPATCH_START	3
-	POP	tmp2
+	POP	tmp1
 	DISPATCH_NEXT
-	strh	tmp2, [r3, r2]
+	strh	tmp1, [r3, r2]
 	DISPATCH_FINISH
 putstatic_w:
 	cmp	lr, #tos_atos >> 1	@ >> 1 due to lsr #29 above
 	beq	putstatic_a
 	DISPATCH_START	3
-	POP	tmp2
+	POP	tmp1
 	DISPATCH_NEXT
-	str	tmp2, [r3, r2]
+	str	tmp1, [r3, r2]
 	DISPATCH_FINISH
 putstatic_sb:
 	DISPATCH_START	3
-	POP	tmp2
+	POP	tmp1
 	DISPATCH_NEXT
-	strb	tmp2, [r3, r2]
+	strb	tmp1, [r3, r2]
 	DISPATCH_FINISH
 putstatic_dw:
 	DISPATCH_START	3
 	add	r2, r2, r3
-	POP	r3, tmp2
+	POP	r3, tmp1
 	DISPATCH_NEXT
-	stm	r2, {r3, tmp2}
+	stm	r2, {r3, tmp1}
 	DISPATCH_FINISH
 putstatic_a:
-	POP	tmp2
-	str	tmp2, [r3, r2]
+	POP	tmp1
+	str	tmp1, [r3, r2]
 	mov	r0, r3
 	bl	Helper_aputfield
 	DISPATCH 3
 
 getstatic_volatile_sh:
 	DISPATCH_START	3
-	ldrsh	tmp2, [r3, lr]
+	ldrsh	tmp1, [r3, lr]
 	FullBarrier
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	tmp1
 	DISPATCH_FINISH
 getstatic_volatile_h:
 	DISPATCH_START	3
-	ldrh	tmp2, [r3, lr]
+	ldrh	tmp1, [r3, lr]
 	FullBarrier
 	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	tmp1
 	DISPATCH_FINISH
 getstatic_volatile_sb:
 	DISPATCH_START	3
-	ldrsb	tmp2, [r3, lr]
-	FullBarrier
-	DISPATCH_NEXT
-	PUSH	tmp2
-	DISPATCH_FINISH
-getstatic_volatile_dw:
-	DISPATCH_START	3
-	add	r3, r3, lr
-#ifndef	__ARM_ARCH_7A__
-	ldm	r3, {r2, tmp2}
+	ldrsb	tmp1, [r3, lr]
 	FullBarrier
 	DISPATCH_NEXT
-	PUSH	r2, tmp2
+	PUSH	tmp1
+	DISPATCH_FINISH
+getstatic_volatile_dw:
+	add	r3, r3, lr
+#ifndef	__ARM_ARCH_7A__
+	ldm	r3, {r2, tmp1}
+	FullBarrier
+	PUSH	r2, tmp1
 #else
-0:	ldrexd	tmp2, tmp1 , [r3]
-	strexd	r2 , tmp2, tmp1, [r3]
-	teq	r2, #0
-	bne	0b
+	ldrexd	r0, r1, [r3]
+	FullBarrier
+	PUSH	r0, r1
+#endif
+	DISPATCH	3
+getstatic_volatile_w:
+	DISPATCH_START	3
+	ldr	tmp1, [r3, lr]
 	FullBarrier
 	DISPATCH_NEXT
-	PUSH	tmp2, tmp1
-#endif
-	DISPATCH_FINISH
-getstatic_volatile_w:
-	DISPATCH_START	3
-	ldr	tmp2, [r3, lr]
-	FullBarrier
-	DISPATCH_NEXT
-	PUSH	tmp2
+	PUSH	tmp1
 	DISPATCH_FINISH
 
 putstatic_volatile_sh:
 putstatic_volatile_h:
 	DISPATCH_START	3
-	POP	tmp2
+	POP	tmp1
 	DISPATCH_NEXT
 	StoreStoreBarrier
-	strh	tmp2, [r3, r2]
+	strh	tmp1, [r3, r2]
 	StoreLoadBarrier
 	DISPATCH_FINISH
 putstatic_volatile_w:
 	cmp	lr, #tos_atos >> 1	@ >> 1 due to lsr #29 above
 	beq	putstatic_volatile_a
 	DISPATCH_START	3
-	POP	tmp2
+	POP	tmp1
 	DISPATCH_NEXT
 	StoreStoreBarrier
-	str	tmp2, [r3, r2]
+	str	tmp1, [r3, r2]
 	StoreLoadBarrier
 	DISPATCH_FINISH
 putstatic_volatile_sb:
 	DISPATCH_START	3
-	POP	tmp2
+	POP	tmp1
 	DISPATCH_NEXT
 	StoreStoreBarrier
-	strb	tmp2, [r3, r2]
+	strb	tmp1, [r3, r2]
 	StoreLoadBarrier
 	DISPATCH_FINISH
 putstatic_volatile_dw:
-	DISPATCH_START	3
 	add	ip, r2, r3
-	POP	tmp2, tmp1
-	DISPATCH_NEXT
+	POP	r0, r1
 	StoreStoreBarrier
 #ifndef	__ARM_ARCH_7A__
-	stm	ip, {tmp2, tmp1}
+	stm	ip, {r0, r1}
 #else
 	// Data in tmp1 & tmp2, address in ip, r2 & r3 scratch
 0:	ldrexd	r2, r3, [ip]
-	strexd	r2, tmp2, tmp1, [ip]
+	strexd	r2, r0, r1, [ip]
 	teq	r2, #0
 	bne	0b
 #endif
-	DISPATCH_NEXT
+	DISPATCH_START	3
 	StoreLoadBarrier
 	DISPATCH_FINISH
 putstatic_volatile_a:
-	POP	tmp2
+	POP	tmp1
 	StoreStoreBarrier
-	str	tmp2, [r3, r2]
+	str	tmp1, [r3, r2]
 	mov	r0, r3
 	bl	Helper_aputfield
 	DISPATCH 3
@@ -2196,13 +2214,12 @@
 	mov	r1, #opc_invokestatic
 resolve_invoke:
 	mov	tmp1, lr
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	DECACHE_JPC
 	DECACHE_STACK
 	bl	_ZN18InterpreterRuntime14resolve_invokeEP10JavaThreadN9Bytecodes4CodeE
-	ldr	r0, [istate, #ISTATE_THREAD]
 	CACHE_JPC
-	ldr	r3, [r0, #4]
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
 	CACHE_CP
 	cmp	r3, #0
         ldrb    r3, [jpc, #1]
@@ -2236,19 +2253,18 @@
 	Opcode	newarray
 	ldrb	r1, [jpc, #1]	@ zero_extendqisi2
 	ldr	r2, [stack, #4]
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	DECACHE_JPC
 	DECACHE_STACK
 	bl	_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei
-	ldr	r0, [istate, #ISTATE_THREAD]
+	ldr	ip, [thread, #THREAD_PENDING_EXC]
 	CACHE_JPC
-	ldr	ip, [r0, #THREAD_PENDING_EXC]
 	CACHE_CP
 	cmp	ip, #0
-	ldr	r2, [r0, #THREAD_VM_RESULT]
+	ldr	r2, [thread, #THREAD_VM_RESULT]
 	bne	handle_exception
 	str	r2, [stack, #4]
-	str	ip, [r0, #THREAD_VM_RESULT]
+	str	ip, [thread, #THREAD_VM_RESULT]
 	DISPATCH	2
 
 	Opcode	anewarray
@@ -2260,17 +2276,16 @@
 	DECACHE_JPC
 	DECACHE_STACK
 	ldr	r1, [lr, #METHOD_CONSTANTS]
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	_ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii
-	ldr	r0, [istate, #ISTATE_THREAD]
+	ldr	ip, [thread, #THREAD_PENDING_EXC]
 	CACHE_JPC
-	ldr	ip, [r0, #THREAD_PENDING_EXC]
 	CACHE_CP
 	cmp	ip, #0
-	ldr	r2, [r0, #THREAD_VM_RESULT]
+	ldr	r2, [thread, #THREAD_VM_RESULT]
 	bne	handle_exception
 	str	r2, [stack, #4]
-	str	ip, [r0, #THREAD_VM_RESULT]
+	str	ip, [thread, #THREAD_VM_RESULT]
 	DISPATCH	3
 
 	Opcode	arraylength
@@ -2292,7 +2307,7 @@
 	cmp	r1, #0
 	beq	null_ptr_exception
 	mov	r2, #0
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	mov	r3, r2
 	bl	_ZN12ThreadShadow21set_pending_exceptionEP7oopDescPKci
 	b	handle_exception
@@ -2336,193 +2351,29 @@
 	DISPATCH 3
 
 	Opcode	monitorenter
-	ldr	r1, [stack, #4]
-	cmp	r1, #0
-	beq	.monitorenter_4
-	ldr	r0, [istate, #ISTATE_MONITOR_BASE]
-	ldr	r2, [istate, #ISTATE_STACK_BASE]
-	cmp	r2, r0
-	beq	.monitorenter_5
-	mov	sl, #0
-.monitorenter_1:
-	ldr	r3, [r2, #4]
-	cmp	r3, #0
-	moveq	sl, r2
-	beq	.monitorenter_2
-	cmp	r1, r3
-	beq	.monitorenter_3
-.monitorenter_2:
-	add	r2, r2, #8
-	cmp	r2, r0
-	bne	.monitorenter_1
-.monitorenter_3:
-	cmp	sl, #0
-	beq	.monitorenter_5
-	str	r1, [sl, #4]
-	ldr	r3, [r1, #0]
-	mov	r0, sl
-	orr	tmp1, r3, #1
-	mov	r2, tmp1
-	str	tmp1, [sl, #0]
-	bl	cmpxchg_ptr
-	cmp	r0, tmp1
-	beq	.monitorenter_exit
-	ldr	r0, [istate, #ISTATE_THREAD]
-	bic	r1, tmp1, #3
-	bl	JavaThread_is_lock_owned
+	mov	r0, r8
+	POP	r1
+ 	DECACHE_JPC
+ 	DECACHE_STACK
+	bl	Helper_monitorenter
+	CACHE_JPC
+	CACHE_CP
+	CACHE_STACK		@ monitorenter may expand stack!!!	
 	cmp	r0, #0
-	movne	lr, #0
-	strne	lr, [sl, #0]
-	bne	.monitorenter_exit
-	mov	r1, sl
-	DECACHE_JPC
-	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
-	bl	_ZN18InterpreterRuntime12monitorenterEP10JavaThreadP15BasicObjectLock
-	ldr	r0, [istate, #ISTATE_THREAD]
-	CACHE_JPC
-	ldr	r3, [r0, #THREAD_PENDING_EXC]
-	cmp	r3, #0
-	beq	.monitorenter_exit
-	b	handle_exception
-.monitorenter_4:
-	b	null_ptr_exception
-.monitorenter_5:
-	mov	lr, #0
+	bne	handle_exception
+	DISPATCH 1
+
+	Opcode	monitorexit
+	mov	r0, r8
+	POP	r1
 	DECACHE_JPC
 	DECACHE_STACK
-	ldr	r3, [istate, #ISTATE_THREAD]
-	ldr	ip, [istate, #ISTATE_THREAD]
-	ldr	r1, [r3, #THREAD_JAVA_SP]
-	ldr	r3, [r3, #THREAD_JAVA_STACK_BASE]
-	str	lr, [ip, #THREAD_LAST_JAVA_SP]
-	rsb	r3, r3, r1
-	cmp	r3, #7
-	ble	vm_fatal_error
-	sub	r3, r1, #8
-	ldr	r1, [istate, #ISTATE_THREAD]
-	str	r3, [r1, #THREAD_JAVA_SP]
-	ldr	r2, [istate, #ISTATE_STACK]
-	ldr	r1, [istate, #ISTATE_STACK_BASE]
-	add	r3, r2, #4
-	cmp	r1, r3
-	bls	.monitorenter_7
-	add	r0, r2, #8
-.monitorenter_6:
-	ldr	r3, [r0, #-4]
-	mov	r2, r0
-	str	r3, [r0, #-12]
-	add	r0, r0, #4
-	ldr	r1, [istate, #ISTATE_STACK_BASE]
-	cmp	r1, r2
-	bhi	.monitorenter_6
-	ldr	r2, [istate, #ISTATE_STACK]
-.monitorenter_7:
-	sub	r1, r1, #8
-	sub	r2, r2, #8
-	ldr	r3, [istate, #ISTATE_STACK_LIMIT]
-	str	r1, [istate, #ISTATE_STACK_BASE]
-	str	r2, [istate, #ISTATE_STACK]
-	sub	r3, r3, #8
-	mov	r0, #0
-	str	r3, [istate, #ISTATE_STACK_LIMIT]
-	str	r0, [r1, #4]
-	ldr	r1, [istate, #ISTATE_THREAD]
-
-	str	r0, [r1, #THREAD_LAST_JAVA_SP]  @ set SP to zero before setting FP
-	ldr	r3, [r1, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [r1, #THREAD_LAST_JAVA_FP]
-	ldr	r3, [r1, #THREAD_JAVA_SP]	
-	str	r3, [r1, #THREAD_LAST_JAVA_SP]
-
-	CACHE_STACK
-	ldr	sl, [istate, #ISTATE_STACK_BASE]
-	ldr	r3, [stack, #4]
+	bl	Helper_monitorexit
 	CACHE_JPC
-	mov	r1, r3
-	str	r3, [sl, #4]
-	ldr	r2, [r3, #0]
-	mov	r0, sl
-	orr	tmp1, r2, #1
-	mov	r2, tmp1
-	str	tmp1, [sl, #0]
-	bl	cmpxchg_ptr
-	cmp	r0, tmp1
-	beq	.monitorenter_exit
-	ldr	r0, [istate, #ISTATE_THREAD]
-	bic	r1, tmp1, #3
-	bl	JavaThread_is_lock_owned
+	CACHE_CP
 	cmp	r0, #0
-	movne	ip, #0
-	strne	ip, [sl, #0]
-	bne	.monitorenter_exit
-	mov	r1, sl
-	ldr	r0, [istate, #ISTATE_THREAD]
-	DECACHE_JPC
-	DECACHE_STACK
-	bl	_ZN18InterpreterRuntime12monitorenterEP10JavaThreadP15BasicObjectLock
-	ldr	r0, [istate, #ISTATE_THREAD]
-	CACHE_JPC
-	ldr	r3, [r0, #THREAD_PENDING_EXC]
-	cmp	r3, #0
-	beq	.monitorenter_exit
-	b	handle_exception
-.monitorenter_exit:
-	CACHE_CP
-	add	stack, stack, #4
-	DISPATCH	1
-
-	Opcode	monitorexit
-	ldr	sl, [stack, #4]
-	cmp	sl, #0
-	beq	.monitorexit_3
-	ldr	r2, [istate, #ISTATE_MONITOR_BASE]
-	ldr	tmp1, [istate, #ISTATE_STACK_BASE]
-	cmp	tmp1, r2
-	bne	.monitorexit_2
-	b	.monitorexit_4
-.monitorexit_1:
-	add	tmp1, tmp1, #8
-	cmp	tmp1, r2
-	beq	.monitorexit_4
-.monitorexit_2:
-	ldr	r3, [tmp1, #4]
-	cmp	sl, r3
-	bne	.monitorexit_1
-	ldr	r0, [tmp1, #0]
-	mov	lr, #0
-	cmp	r0, lr
-	str	lr, [tmp1, #4]
-	beq	.monitorexit_exit
-	mov	r1, sl
-	mov	r2, tmp1
-	bl	cmpxchg_ptr
-	cmp	tmp1, r0
-	beq	.monitorexit_exit
-	str	sl, [tmp1, #4]
-	mov	r1, tmp1
-	DECACHE_JPC
-	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
-	bl	_ZN18InterpreterRuntime11monitorexitEP10JavaThreadP15BasicObjectLock
-	ldr	r0, [istate, #ISTATE_THREAD]
-	ldr	r3, [r0, #THREAD_PENDING_EXC]
-	CACHE_JPC
-	cmp	r3, #0
-	beq	.monitorexit_exit
-	b	handle_exception
-.monitorexit_3:
-	b	null_ptr_exception
-.monitorexit_4:
-	ldr	r0, [istate, #ISTATE_THREAD]
-	DECACHE_JPC
-	DECACHE_STACK
-	bl	_ZN18InterpreterRuntime37throw_illegal_monitor_state_exceptionEP10JavaThread
-	b	handle_exception_with_bcp
-.monitorexit_exit:
-	add	stack, stack, #4
-	CACHE_CP
-	DISPATCH	1
+	bne	handle_exception
+	DISPATCH 1
 
 	ALIGN_CODE
 vm_fatal_error:
@@ -2621,10 +2472,10 @@
 	DISPATCH	4
 do_wide_ret:
 	ldr	r2, [istate, #ISTATE_METHOD]
-	ldr	r2, [r2, #8]
+	ldr	r2, [r2, #METHOD_CONSTMETHOD]
 	ldr	r1, [locals, -r1, lsl #2]
 	add	jpc, r2, r1
-	DISPATCH	48
+	DISPATCH	CONSTMETHOD_CODEOFFSET
 do_wide_iinc:
 	ldrsb	r2, [jpc, #4]
 	ldrb	r3, [jpc, #5]
@@ -2636,28 +2487,27 @@
 
 	Opcode	multianewarray
 	ldrb	tmp1, [jpc, #3]	@ zero_extendqisi2
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	add	r1, stack, tmp1, lsl #2
 	DECACHE_JPC
 	DECACHE_STACK
 	bl	_ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi
-	ldr	r0, [istate, #ISTATE_THREAD]
 	CACHE_JPC
-	ldr	r1, [r0, #THREAD_PENDING_EXC]
+	ldr	r1, [thread, #THREAD_PENDING_EXC]
 	CACHE_CP
 	cmp	r1, #0
-	ldr	r3, [r0, #THREAD_VM_RESULT]
+	ldr	r3, [thread, #THREAD_VM_RESULT]
 	bne	handle_exception
 	str	r3, [stack, tmp1, asl #2]!
-	str	r1, [r0, #THREAD_VM_RESULT]
+	str	r1, [thread, #THREAD_VM_RESULT]
 	sub	stack, stack, #4
 	DISPATCH	4
 
 	Opcode	jsr_w
 	ldr	r3, [istate, #ISTATE_METHOD]
-	ldr	r1, [r3, #8]
+	ldr	r1, [r3, #METHOD_CONSTMETHOD]
 	rsb	r2, r1, jpc
-	sub	r2, r2, #43
+	sub	r2, r2, #CONSTMETHOD_CODEOFFSET - 5
 	str	r2, [stack], #-4
 	b	do_goto_w
 
@@ -2680,7 +2530,7 @@
 	bne	1f
 	DECACHE_JPC
 	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	Helper_SafePoint
 	CACHE_JPC
 	CACHE_CP
@@ -2693,20 +2543,19 @@
 	mov	r2, jpc
 	DECACHE_STACK
 	DECACHE_JPC
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	ldr	r1, [istate, #ISTATE_METHOD]
 	bl	_ZN18InterpreterRuntime24get_original_bytecode_atEP10JavaThreadP13methodOopDescPh
 	mov	tmp1, r0
-	ldr	r0, [istate, #ISTATE_THREAD]
-	ldr	r3, [r0, #THREAD_PENDING_EXC]
+	mov	r0, thread
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
 	cmp	r3, #0
 	bne	handle_exception
 	ldr	r2, [istate, #ISTATE_BCP]
 	ldr	r1, [istate, #ISTATE_METHOD]
 	bl	_ZN18InterpreterRuntime11_breakpointEP10JavaThreadP13methodOopDescPh
-	ldr	r0, [istate, #ISTATE_THREAD]
 	CACHE_JPC
-	ldr	r3, [r0, #THREAD_PENDING_EXC]
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
 	CACHE_CP
 	cmp	r3, #0
 	and	r0, tmp1, #255
@@ -2776,44 +2625,48 @@
 	beq	handle_return
 	DECACHE_JPC
 	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	_ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc
-	ldr	r0, [istate, #ISTATE_THREAD]
 	CACHE_JPC
-	ldr	r3, [r0, #THREAD_PENDING_EXC]
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
 @ CACHE_LOCALS & CACHE_CP not require for handle_retuen / handle_exception
 	cmp	r3, #0
 	beq	handle_return
 	b	handle_exception
 
-	ALIGN_CODE
+// This code is logically part of normal_entry_synchronized, but it's
+// been moved out because there is only a FAST_ENTRY_OFFSET sized gap
+// here.
+
+.normal_entry_return_synchronized:
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {regset, pc}
+	SLOW_ENTRY
 normal_entry_synchronized:
 	stmfd	arm_sp!, {regset, lr}
-	bl	fast_normal_entry_synchronized
-	mov	r0, #0	@ deoptimized_frames = 0
-	ldmfd	arm_sp!, {regset, pc}
-
-	ALIGN_CODE
+	mov	thread, r2
+	ldr	r7, [thread, #THREAD_STACK_SIZE]
+	ldr	r3, [thread, #THREAD_STACK_BASE]
+	rsb	r3, r7, r3
+	rsb	r3, r3, arm_sp
+	cmp	r3, #32768
+	blt	stack_overflow_no_frame
+	add	lr, pc, #(.normal_entry_return_synchronized-(.fast_normal_entry1+4))
+.fast_normal_entry1:
+
+	FAST_ENTRY
 fast_normal_entry_synchronized:
 	stmfd	arm_sp!, {fast_regset, lr}
 
-	mov	sl, r0
-	mov	tmp1, r2
-
-	ldr	r0, [tmp1, #THREAD_STACK_SIZE]
-	ldr	r3, [tmp1, #THREAD_STACK_BASE]
-	rsb	r3, r0, r3
-	rsb	r3, r3, arm_sp
-	cmp	r3, #32768
-	blt	stack_overflow_before_frame
-
-	ldrh	r2, [sl, #METHOD_MAXLOCALS]
-	ldrh	r3, [sl, #METHOD_SIZEOFPARAMETERS]
+	mov	tmp1, r0
+
+	ldrh	r2, [tmp1, #METHOD_MAXLOCALS]
+	ldrh	r3, [tmp1, #METHOD_SIZEOFPARAMETERS]
 	rsb	r8, r3, r2
 
-	ldr	r1, [tmp1, #THREAD_JAVA_SP]
-	ldrh	r0, [sl, #METHOD_MAXSTACK]
-	ldr	r3, [tmp1, #THREAD_JAVA_STACK_BASE]
+	ldr	r1, [thread, #THREAD_JAVA_SP]
+	ldrh	r0, [tmp1, #METHOD_MAXSTACK]
+	ldr	r3, [thread, #THREAD_JAVA_STACK_BASE]
 
 	sub	r5, r1, r8, lsl #2
 	sub	r5, r5, #FRAME_SIZE+STACK_SPARE+LEAF_STACK_SIZE
@@ -2829,32 +2682,25 @@
 	subs	r8, r8, #1
 	str	r2, [r1, #-4]!
 	bgt	.zero_locals_synchronized
-	str	r1, [tmp1, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_JAVA_SP]
 
 .normal_entry_synchronized_no_locals:
-	mov	r2, tmp1
-	mov	r1, sl
-	add	r0, tmp1, #THREAD_JAVA_STACK_BASE
+	mov	r2, thread
+	mov	r1, tmp1
+	add	r0, thread, #THREAD_JAVA_STACK_BASE
 	bl	build_frame
-	mov	tmp_vvv, r0
-	ldr	r3, [tmp1, #THREAD_TOP_ZERO_FRAME]
-	sub	r0, r0, #72	@ This mysterious constant is actually the offset of
-				@ the next frame field.  Why is "ISTATE_NEXT_FRAME"
-				@ not used here?
-	mov	istate, r0
-	str	r3, [tmp_vvv, #0]
+	ldr	ip, [thread, #THREAD_TOP_ZERO_FRAME]
+	sub	istate, r0, #ISTATE_NEXT_FRAME
+	mov	r2, #0  @ set SP to zero before setting FP
+	str	r0, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	str	r0, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	r3, [thread, #THREAD_JAVA_SP]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	str	ip, [istate, #ISTATE_NEXT_FRAME]
 	adrl	ip, dispatch_init_adcon
-	ldr	r1, [tmp1, #THREAD_JAVA_SP]
-	str	r1, [tmp1, #THREAD_LAST_JAVA_SP]
 	ldm	ip, {r0, r1}
 	add	r0, r0, ip
-	str	tmp_vvv, [tmp1, #THREAD_TOP_ZERO_FRAME]
-@	CACHE_JPC
-	mov	r2, #0  @ set SP to zero before setting FP
-	str	r2, [tmp1, #THREAD_LAST_JAVA_SP]
-	str	tmp_vvv, [tmp1, #THREAD_LAST_JAVA_FP]
-	ldr	r3, [tmp1, #THREAD_JAVA_SP]
-	str	r3, [tmp1, #THREAD_LAST_JAVA_SP]
 	add	dispatch, r1, r0
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [r0, #METHOD_ACCESSFLAGS]
@@ -2866,20 +2712,21 @@
 	CACHE_LOCALS
 	tst	r3, #JVM_ACC_STATIC
 	ldrne	r3, [r0, #METHOD_CONSTANTS]
-	ldreq	sl, [locals, #0]
+	ldreq	tmp1, [locals, #0]
 	ldrne	r2, [r3, #CONSTANTPOOL_POOL_HOLDER]
-	ldr	tmp1, [istate, #ISTATE_MONITOR_BASE]
-	ldrne	sl, [r2, #KLASS_PART + KLASS_JAVA_MIRROR]
-	ldr	r3, [sl, #0]
+	ldrne	tmp1, [r2, #KLASS_PART+KLASS_JAVA_MIRROR]
+	ldr	r3, [tmp1, #0]
 	orr	tmp_xxx, r3, #1
-	str	tmp_xxx, [tmp1, #-8]!
+	ldr	ip, [istate, #ISTATE_MONITOR_BASE]
+	str	tmp_xxx, [ip, #-8]
 .normal_do_synchronisation_2:
-	ldr	tmp_vvv, [sl, #0]
+	ldr	tmp_vvv, [tmp1, #0]
 	cmp	tmp_xxx, tmp_vvv
 	bne	.normal_do_synchronisation_3
 	mov	r0, tmp_xxx
-	mov	r1, tmp1
-	mov	r2, sl
+	ldr	r1, [istate, #ISTATE_MONITOR_BASE]
+	sub	r1, r1, #8
+	mov	r2, tmp1
 	mov	r3, #0xffffffc0
 	bic	r3, r3, #0xf000
 	blx	r3
@@ -2887,21 +2734,22 @@
 	bne	.normal_do_synchronisation_2
 	b	1f
 .normal_do_synchronisation_3:
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bic	r1, tmp_xxx, #3
 	bl	JavaThread_is_lock_owned
 	cmp	r0, #0
 	beq	.normal_do_synchronisation_4
+	ldr	ip, [istate, #ISTATE_MONITOR_BASE]
 	mov	r3, #0
-	str	r3, [tmp1]
+	str	r3, [ip, #-8]
 	b	1f
 .normal_do_synchronisation_4:
-	mov	r1, tmp1
+	ldr	r1, [istate, #ISTATE_MONITOR_BASE]
+	sub	r1, r1, #8
 	DECACHE_STACK
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	_ZN18InterpreterRuntime12monitorenterEP10JavaThreadP15BasicObjectLock
-	ldr	r0, [istate, #ISTATE_THREAD]
-	ldr	r3, [r0, #THREAD_PENDING_EXC]
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
 	cmp	r3, #0
 	mov	r2, r0
 	bne	handle_exception_do_not_unlock
@@ -2924,15 +2772,15 @@
         ldr     r3, [r0, #METHOD_CONSTMETHOD]
         ldrh    r3, [r3, #CONSTMETHOD_CODESIZE]
 	mov	r1, #0
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
         cmp     r3, #MAX_FG_METHOD_SIZE
         bcc     1f
-        ldr     tmp2, [dispatch, #BackgroundCompilation_Address-XXX]
+        ldr     tmp1, [dispatch, #BackgroundCompilation_Address-XXX]
         mov     r3, #1
-        ldr     r5, [tmp2]
-        str     r3, [tmp2]
+        ldr     r5, [tmp1]
+        str     r3, [tmp1]
         bl      FREQ_COUNT_OVERFLOW
-        str     r5, [tmp2]
+        str     r5, [tmp1]
         b       2f
 1:
 	bl	FREQ_COUNT_OVERFLOW
@@ -2964,15 +2812,15 @@
 	SW_NPC	cmp	r2, #0
 	SW_NPC	beq	null_ptr_exception
 .abortentry110:
-	ldr	tmp2, [r2, #4]				@ rcvr->klass()
+	ldr	tmp1, [r2, #4]				@ rcvr->klass()
 	tst	r3, #flag_methodInterface
 	bne	.invokeinterface_methodInterface
 
 	ldr	lr, [r0, #CP_OFFSET+4]			@ lr = iclass
 
-	add	r1, tmp2, #INSTANCEKLASS_VTABLE_OFFSET
-	ldr	r2, [tmp2, #KLASS_PART+INSTANCEKLASS_VTABLE_LEN]
-	ldr	ip, [tmp2, #KLASS_PART+INSTANCEKLASS_ITABLE_LEN]
+	add	r1, tmp1, #INSTANCEKLASS_VTABLE_OFFSET
+	ldr	r2, [tmp1, #KLASS_PART+INSTANCEKLASS_VTABLE_LEN]
+	ldr	ip, [tmp1, #KLASS_PART+INSTANCEKLASS_ITABLE_LEN]
 	add	r2, r2, #1
 	bic	r2, r2, #1
 
@@ -2989,30 +2837,27 @@
 
 	ldr	r3, [r0, #CP_OFFSET+8]
 	ldr	r2, [r1, #-4]
-	add	r3, tmp2, r3, lsl #2
-	ldr	tmp2, [r3, r2]
-	cmp	tmp2, #0
+	add	r3, tmp1, r3, lsl #2
+	ldr	tmp1, [r3, r2]
+	cmp	tmp1, #0
 	beq	abstractmethod_exception
 .invokeinterface_invoke:
-	ldr	tmp1, [istate, #ISTATE_THREAD]
-@	str	tmp2, [istate, #ISTATE_CALLEE]
-	ldr	ip, [tmp2, #METHOD_FROM_INTERPRETED]
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
 	mov	r1, #0
-	str	ip, [istate, #36]
-	str	r1, [tmp1, #THREAD_LAST_JAVA_FP]
-	str	r1, [tmp1, #THREAD_LAST_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
 
 	add	stack, stack, #4
-	str	stack, [tmp1, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	ldr	r3, [ip]
 
-	mov	r0, tmp2
-	mov	r1, ip
-#ifndef SHARK
-	add	r3, r3, #CODE_ALIGN_SIZE
+	mov	r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+	add	r3, r3, #FAST_ENTRY_OFFSET
 #endif
-	mov	r2, tmp1
 	blx	r3
 
 	adrl	ip, dispatch_init_adcon
@@ -3022,24 +2867,23 @@
 
 	CACHE_LOCALS
 
-	ldr	ip, [istate, #ISTATE_THREAD]
 	CACHE_JPC
-	ldr	stack, [ip, #THREAD_JAVA_SP]
+	ldr	stack, [thread, #THREAD_JAVA_SP]
 	ldr	r2, [istate, #ISTATE_STACK_LIMIT]
 	sub	stack, stack, #4
 
-	ldr	r1, [ip, #THREAD_JAVA_SP]
+	ldr	r1, [thread, #THREAD_JAVA_SP]
 	stmfd	arm_sp!, {r1}
 	mov	r1, #0
-	str	r1, [ip, #THREAD_LAST_JAVA_SP]
-	ldr	r1, [ip, #THREAD_TOP_ZERO_FRAME]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
 	add	r2, r2, #4
-	str	r2, [ip, #THREAD_JAVA_SP]
-	str	r1, [ip, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
 	ldmfd	arm_sp!, {r1}
-	str	r1, [ip, #THREAD_LAST_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
 	DISPATCH_START	5
-	ldr	r3, [ip, #4]
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	cmp	r3, #0
@@ -3051,11 +2895,11 @@
 
 .invokeinterface_methodInterface:
 	tst	r3, #flag_vfinalMethod
-	ldrne	tmp2, [r0, #CP_OFFSET+8]
+	ldrne	tmp1, [r0, #CP_OFFSET+8]
 	bne	.invokeinterface_invoke
 	ldr	r1, [r0, #CP_OFFSET+8]
-	add	r3, tmp2, r1, lsl #2
-	ldr	tmp2, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+	add	r3, tmp1, r1, lsl #2
+	ldr	tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
 	b	.invokeinterface_invoke
 
 # r2 = [jpc, #1]
@@ -3093,7 +2937,7 @@
 	ldr	r0, [r0, #CP_OFFSET+4]	// Call site
 .abortentry119:
 	ldr	r0, [r0, lr]		// Method handle
-	ldr	r1, [istate, #ISTATE_THREAD]
+	mov	r1, thread
 
 	// Call the target method
 	bl	_ZN14CppInterpreter21process_method_handleEP7oopDescP6Thread
@@ -3106,27 +2950,26 @@
 
 	CACHE_LOCALS
 
-	ldr	ip, [istate, #ISTATE_THREAD]
 	CACHE_JPC
-	ldr	stack, [ip, #THREAD_JAVA_SP]
+	ldr	stack, [thread, #THREAD_JAVA_SP]
 	ldr	r2, [istate, #ISTATE_STACK_LIMIT]
 	sub	stack, stack, #4
 
 	// Fix up everything in the thread state to point to the
 	// current frame
-	ldr	r1, [ip, #THREAD_JAVA_SP]
+	ldr	r1, [thread, #THREAD_JAVA_SP]
 	stmfd	arm_sp!, {r1}
 	mov	r1, #0
-	str	r1, [ip, #THREAD_LAST_JAVA_SP]
-	ldr	r1, [ip, #THREAD_TOP_ZERO_FRAME]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
 	add	r2, r2, #4
-	str	r2, [ip, #THREAD_JAVA_SP]
-	str	r1, [ip, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
 	ldmfd	arm_sp!, {r1}
-	str	r1, [ip, #THREAD_LAST_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
 	DISPATCH_START	5
 	// Test for an exception
-	ldr	r3, [ip, #4]
+	ldr	r3, [thread, #4]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	cmp	r3, #0
@@ -3162,7 +3005,7 @@
 	ldr     r1, [r0, #CP_OFFSET+4]  // Pointer to call site
 	// Already resolved?
 	cmp     r1, #0 
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	mov	r1, #\opc
 	bleq    _ZN18InterpreterRuntime11resolve_ldcEP10JavaThreadN9Bytecodes4CodeE
 
@@ -3173,8 +3016,7 @@
 	PUSH	r1
 
 	// Test for an exception
-	ldr	ip, [istate, #ISTATE_THREAD]
-	ldr	r3, [ip, #4]
+	ldr	r3, [thread, #4]
 	cmp	r3, #0
 	bne	handle_exception
 	
@@ -3209,7 +3051,6 @@
 	DECACHE_JPC
 	add	r0, r2, asl #4
 	ldr	r3, [r0, #CP_OFFSET+12]
-	ldr	tmp1, [istate, #ISTATE_THREAD]
 	and	r1, r3, #255
 	ldr	r2, [stack, r1, asl #2]
 	mov	r1, #0
@@ -3218,15 +3059,13 @@
 .abortentry117:
 	HW_NPC	ldr	r3, [r2]		@ Only to provoke abort
 
-	ldr	tmp2, [r0, #CP_OFFSET+8]
-
-@	str	tmp2, [istate, #ISTATE_CALLEE]
-	ldr	ip, [tmp2, #METHOD_FROM_INTERPRETED]
-	str	r1, [tmp1, #THREAD_LAST_JAVA_SP]
-	str	ip, [istate, #36]
+	ldr	tmp1, [r0, #CP_OFFSET+8]
+
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
 
 	add	stack, stack, #4
-	str	stack, [tmp1, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	ldr	r3, [ip, #0]
 	b	normal_dispatch_and_return
@@ -3251,20 +3090,19 @@
 #else
         DECACHE_STACK
 	DECACHE_JPC
-        ldr     tmp1, [istate, #ISTATE_THREAD]
         and     r1, r3, #255
         ldr     r2, [stack, r1, asl #2]
         mov     r1, #0
         cmp     r2, #0
         beq     null_ptr_exception
 
-        ldr     tmp2, [r0, #CP_OFFSET+8]
+        ldr     tmp1, [r0, #CP_OFFSET+8]
         tst     r3, #flag_vfinalMethod
         bne     1f
 
         ldr     r3, [r2, #4]
-        add     r3, r3, tmp2, lsl #2
-        ldr     tmp2, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+        add     r3, r3, tmp1, lsl #2
+        ldr     tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
 1:
 #endif // FAST_BYTECODES
 
@@ -3278,37 +3116,33 @@
 	DECACHE_JPC
         add     r0, r0, r2, asl #4
         ldr     r3, [r0, #CP_OFFSET+12]
-        ldr     tmp1, [istate, #ISTATE_THREAD]
         and     r1, r3, #255
         ldr     r2, [stack, r1, asl #2]
         mov     r1, #0
         SW_NPC	cmp     r2, #0
         SW_NPC	beq     null_ptr_exception_jpc_0
 
-        ldr     tmp2, [r0, #CP_OFFSET+8]
+        ldr     tmp1, [r0, #CP_OFFSET+8]
 .abortentry104:
         ldr     r3, [r2, #4]
-        add     r3, r3, tmp2, lsl #2
-        ldr     tmp2, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+        add     r3, r3, tmp1, lsl #2
+        ldr     tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
 #endif // FAST_BYTECODES
-
-@        str     tmp2, [istate, #ISTATE_CALLEE]
-        ldr     ip, [tmp2, #METHOD_FROM_INTERPRETED]
-        str     r1, [tmp1, #THREAD_LAST_JAVA_SP]
-        str     ip, [istate, #36]
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
 
         add     stack, stack, #4
-        str     stack, [tmp1, #THREAD_JAVA_SP]
+        str     stack, [thread, #THREAD_JAVA_SP]
 
         ldr     r3, [ip, #0]
 
 normal_dispatch_and_return:
-	mov	r0, tmp2
-	mov	r1, ip
-#ifndef SHARK
-	add	r3, r3, #CODE_ALIGN_SIZE
+	mov	r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+	add	r3, r3, #FAST_ENTRY_OFFSET
 #endif
-	mov	r2, tmp1
 	blx	r3
 
 	adrl	ip, dispatch_init_adcon
@@ -3318,25 +3152,25 @@
 
 	CACHE_LOCALS
 
-	ldr	ip, [istate, #ISTATE_THREAD]
 	CACHE_JPC
-	ldr	stack, [ip, #THREAD_JAVA_SP]
+	ldr	stack, [thread, #THREAD_JAVA_SP]
 	ldr	r2, [istate, #ISTATE_STACK_LIMIT]
 	sub	stack, stack, #4
 
-	ldr	r1, [ip, #THREAD_TOP_ZERO_FRAME]
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
 	add	r2, r2, #4
 	mov	r3, #0
-	str	r3, [ip, #THREAD_LAST_JAVA_SP]
-	str	r2, [ip, #THREAD_JAVA_SP]
-	str	r1, [ip, #THREAD_LAST_JAVA_FP]
-	str	r2, [ip, #THREAD_LAST_JAVA_SP]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
 	DISPATCH_START	3
-	ldr	r3, [ip, #4]
+	ldr	r3, [thread, #THREAD_PENDING_EXC]
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	cmp	r3, #0
 	DISPATCH_NEXT
+
 	bne	invoke_exception_fix
 	DISPATCH_NEXT
 	CACHE_CP
@@ -3360,20 +3194,16 @@
 	DECACHE_JPC
   FBC	add	r0, r2, asl #4
 
-	ldr	tmp2, [r0, #CP_OFFSET+4]
+	ldr	tmp1, [r0, #CP_OFFSET+4]
 	mov	r1, #0
-@	str	tmp2, [istate, #ISTATE_CALLEE]
-	ldr	r3, [tmp2, #METHOD_FROM_INTERPRETED]
-	ldr	tmp1, [istate, #ISTATE_THREAD]
-	str	r3, [istate, #36]
-	str	r1, [tmp1, #THREAD_LAST_JAVA_SP]
-	str	r1, [tmp1, #THREAD_LAST_JAVA_FP]
+	ldr	r3, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
 
 	add	stack, stack, #4
-	str	stack, [tmp1, #THREAD_JAVA_SP]
-
-	ldr	ip, [istate, #36]
-	ldr	r3, [ip, #0]
+	str	stack, [thread, #THREAD_JAVA_SP]
+
+	ldr	r3, [r3, #0]
 	b	normal_dispatch_and_return
 
 
@@ -3396,7 +3226,6 @@
   FBC	add	r0, r2, asl #4
 
 	ldr	r3, [r0, #CP_OFFSET+12]
-	ldr	tmp1, [istate, #ISTATE_THREAD]
 	and	r3, r3, #255
 	ldr	r2, [stack, r3, asl #2]
 	mov	r1, #0
@@ -3405,55 +3234,55 @@
 .abortentry118:
 	HW_NPC	ldr	r3, [r2]		@ Only to provoke abort
 
-	ldr	tmp2, [r0, #CP_OFFSET+4]
-
-@	str	tmp2, [istate, #ISTATE_CALLEE]
-	ldr	ip, [tmp2, #METHOD_FROM_INTERPRETED]
-	str	r1, [tmp1, #THREAD_LAST_JAVA_SP]
-	str	ip, [istate, #36]
+	ldr	tmp1, [r0, #CP_OFFSET+4]
+
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
 
 	add	stack, stack, #4
-	str	stack, [tmp1, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	ldr	r3, [ip, #0]
 	b	normal_dispatch_and_return
 
-	ALIGN_CODE
+// This code is logically part of normal_entry, but it's been moved
+// out because there is only a FAST_ENTRY_OFFSET sized gap here.
+
+.normal_entry_return:
+	mov	r0, #0	@ deoptimized_frames = 0
+	ldmfd	arm_sp!, {regset, pc}
+	SLOW_ENTRY
 normal_entry:
 	stmfd	arm_sp!, {regset, lr}
-
-	ldr	r7, [r2, #THREAD_STACK_SIZE]
-	ldr	r3, [r2, #THREAD_STACK_BASE]
+	mov	thread, r2
+	ldr	r7, [thread, #THREAD_STACK_SIZE]
+	ldr	r3, [thread, #THREAD_STACK_BASE]
 	rsb	r3, r7, r3
 	rsb	r3, r3, arm_sp
 	cmp	r3, #32768
 	blt	stack_overflow_no_frame
-
-	bl	fast_normal_entry
-
-	mov	r0, #0	@ deoptimized_frames = 0
-	ldmfd	arm_sp!, {regset, pc}
-
-	ALIGN_CODE
+	add	lr, pc, #(.normal_entry_return-(.normal_entry1+4))
+.normal_entry1:
+
+	FAST_ENTRY
 fast_normal_entry:
 	adrl	ip, dispatch_init_adcon
-	mov	tmp2, r0
+	mov	tmp1, r0
 	ldm	ip, {r0, r1}
-	mov	tmp1, r2
 	add	r0, r0, ip
-	ldr	stack, [tmp1, #THREAD_JAVA_SP]
+	ldr	stack, [thread, #THREAD_JAVA_SP]
 	add	dispatch, r1, r0
 
 	stmdb	arm_sp!, {fast_regset, lr}
 
-	ldrh	r0, [tmp2, #METHOD_MAXLOCALS]
+	ldrh	r0, [tmp1, #METHOD_MAXLOCALS]
 	mov	r1, #0
-	ldrh	r3, [tmp2, #METHOD_SIZEOFPARAMETERS]
+	ldrh	r3, [tmp1, #METHOD_SIZEOFPARAMETERS]
         mov     ip, #INTERPRETER_FRAME
-	ldrh	r2, [tmp2, #METHOD_MAXSTACK]
+	ldrh	r2, [tmp1, #METHOD_MAXSTACK]
         sub     r7, r0, r3
 
-	ldr	r3, [tmp1, #THREAD_JAVA_STACK_BASE]
+	ldr	r3, [thread, #THREAD_JAVA_STACK_BASE]
 	sub	r5, stack, r7, lsl #2
 	sub	r5, r5, #FRAME_SIZE+STACK_SPARE+LEAF_STACK_SIZE
 	sub	r5, r5, r2, lsl #2
@@ -3470,12 +3299,12 @@
         subs    r5, r5, #2
         bcs     1b
 3:
-	ldr	r3, [tmp1, #THREAD_TOP_ZERO_FRAME]
+	ldr	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 	mov	lr, #0
         sub     istate, stack, #FRAME_SIZE     // stack->push(INTERPRETER_FRAME);
         sub     r2, istate, r2, lsl #2
         str     lr, [istate, #ISTATE_MSG]
-	str	r2, [tmp1, #THREAD_JAVA_SP]
+	str	r2, [thread, #THREAD_JAVA_SP]
         sub     r5, r2, #4                      @ stack limit = istate - stackwords - 4
 	str	r3, [istate, #ISTATE_NEXT_FRAME]
 	str	ip, [istate, #ISTATE_FRAME_TYPE]
@@ -3486,24 +3315,24 @@
 	str	r1, [istate, #ISTATE_OOP_TEMP]
         add     locals, locals, r0, lsl #2
         sub     stack, istate, #4
-        ldr     jpc, [tmp2, #METHOD_CONSTMETHOD]
-        ldr     constpool, [tmp2, #METHOD_CONSTANTS]
+        ldr     jpc, [tmp1, #METHOD_CONSTMETHOD]
+        ldr     constpool, [tmp1, #METHOD_CONSTANTS]
         add     ip, istate, #ISTATE_NEXT_FRAME
 	DISPATCH_START	CONSTMETHOD_CODEOFFSET
         ldr     constpool, [constpool, #CONSTANTPOOL_CACHE]
-        str     ip, [tmp1, #THREAD_TOP_ZERO_FRAME]
-  USEC	ldr	r3, [r10, #METHOD_INVOCATIONCOUNTER]
+        str     ip, [thread, #THREAD_TOP_ZERO_FRAME]
+  USEC	ldr	r3, [tmp1, #METHOD_INVOCATIONCOUNTER]
 	mov	r1, #0
-        str     r1, [tmp1, #THREAD_LAST_JAVA_SP]
-        str     ip, [tmp1, #THREAD_LAST_JAVA_FP]
-	ldr	ip, [tmp1, #THREAD_JAVA_SP]
-        str     ip, [tmp1, #THREAD_LAST_JAVA_SP]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+        str     ip, [thread, #THREAD_LAST_JAVA_FP]
+	ldr	ip, [thread, #THREAD_JAVA_SP]
+        str     ip, [thread, #THREAD_LAST_JAVA_SP]
 	DISPATCH_NEXT
   USEC	ldr	lr, [dispatch, #InterpreterInvocationLimit_Address-XXX]
   USEC	add	r3, r3, #INVOCATIONCOUNTER_COUNTINCREMENT
-        str     tmp1, [istate, #ISTATE_THREAD]
+        str     thread, [istate, #ISTATE_THREAD]
   USEC	ldr	lr, [lr]
-  USEC	str	r3, [tmp2, #METHOD_INVOCATIONCOUNTER]
+  USEC	str	r3, [tmp1, #METHOD_INVOCATIONCOUNTER]
 	str	locals, [istate, #ISTATE_LOCALS]
   USEC	cmp	r3, lr
 	str	constpool, [istate, #ISTATE_CONSTANTS]
@@ -3511,7 +3340,7 @@
 	DISPATCH_NEXT
 	DISPATCH_NEXT
 	DISPATCH_NEXT
-	str	r10, [istate, #ISTATE_METHOD]
+	str	tmp1, [istate, #ISTATE_METHOD]
  	str	istate, [istate, #ISTATE_SELF_LINK]
 @	mov	lr, #0
 @        str     lr, [istate, #ISTATE_PREV_LINK]
@@ -3519,20 +3348,20 @@
 	DISPATCH_FINISH
 #ifdef USE_COMPILER
 method_entry_freq_count_overflow:
-        ldr     r3, [r10, #METHOD_CONSTMETHOD]
+        ldr     r3, [tmp1, #METHOD_CONSTMETHOD]
 	DECACHE_JPC
         ldrh    r3, [r3, #CONSTMETHOD_CODESIZE]
-	str	r10, [istate, #ISTATE_METHOD]
+	str	tmp1, [istate, #ISTATE_METHOD]
 	mov	r1, #0
-	mov	r0, tmp1
+	mov	r0, thread
         cmp     r3, #MAX_FG_METHOD_SIZE
         bcc     1f
-        ldr     tmp2, [dispatch, #BackgroundCompilation_Address-XXX]
+        ldr     tmp1, [dispatch, #BackgroundCompilation_Address-XXX]
         mov     r3, #1
-        ldr     r5, [tmp2]
-        str     r3, [tmp2]
+        ldr     r5, [tmp1]
+        str     r3, [tmp1]
         bl      FREQ_COUNT_OVERFLOW
-        str     r5, [tmp2]
+        str     r5, [tmp1]
         b       2f
 1:
 	bl	FREQ_COUNT_OVERFLOW
@@ -3543,17 +3372,36 @@
 	CACHE_CP
 	DISPATCH	0
 
-#ifdef THUMB2EE
-#define THUMB2_MAXLOCALS 1000
+#ifdef T2JIT
+
+#define JAZ_V1	r5
+#define JAZ_V2	r6
+#define JAZ_V3	r7
+#define JAZ_V4	r8
+#define	JAZ_V5	r9
+#define	JAZ_V6	r11
+
+#define JAZ_REGSET	JAZ_V1,JAZ_V2,JAZ_V3,JAZ_V4,JAZ_V5,JAZ_V6
+#define JAZ_REGSET_LEN	6
+
 call_thumb2:
 	str	istate, [istate, #ISTATE_SELF_LINK]
-	mov	ip, r1
-	sub	r1, locals, #THUMB2_MAXLOCALS * 4
-	ldr	r2, [istate, #ISTATE_THREAD]
-	sub	r3, locals, #31 * 4
+	stmdb	sp!, {JAZ_REGSET}
+	mov	ip, #0
+3:
+	ldrsh	r3, [r1], #2
+	cmp	r3, #-1
+	ldrne	r3, [locals, -r3, lsl #2]
+	strne	r3, [sp, ip, lsl #2]
+	add	ip, ip, #1
+	cmp	ip, #JAZ_REGSET_LEN
+	bne	3b
+
+	ldmia	sp!, {JAZ_REGSET}
+1:
 	add	stack, stack, #4
-	bx	ip
-#endif // THUMB2EE
+	bx r0
+#endif // T2JIT
 
 #endif // USE_COMPILER
 	.global	Thumb2_Install
@@ -3564,25 +3412,24 @@
 	bx	lr
 
 handle_return:
-	ldr	tmp2, [istate, #ISTATE_MONITOR_BASE]	@ tmp2 = base
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
 
 	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
-	ldr	tmp_xxx, [istate, #ISTATE_THREAD]
-
-	cmp	tmp1, tmp2
+
+	cmp	tmp1, r9
 	blcc	return_check_monitors
 
 	mov	r3, #0
 	ldrb	lr, [jpc, #0]
 
-	ldr	r2, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_SP]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_FP]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
+	str	r3, [thread, #THREAD_LAST_JAVA_FP]
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [r2, #0]
 	ldrh	r0, [r0, #40]
 	add	r1, r2, #4
-	str	r3, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 
 	add	r1, r1, r0, lsl #2
 
@@ -3596,14 +3443,14 @@
 	ldreq	r0, [stack, #4]
 	streq	r0, [r1, #-4]!
 
-	str	r1, [tmp_xxx, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_JAVA_SP]
 
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
 
 @ ----------------------------------------------------------------------------------------
 stack_overflow_no_frame:
-	mov	r0, tmp1
+	mov	r0, thread
 	mov	ip, #0
 	str	ip, [r0, #THREAD_LAST_JAVA_SP]
 	ldr	ip, [r0, #THREAD_TOP_ZERO_FRAME]
@@ -3614,7 +3461,7 @@
 	ldmfd	arm_sp!, {regset, pc}
 
 stack_overflow_before_frame:
-	mov	r0, tmp1
+	mov	r0, thread
 	mov	ip, #0
 	str	ip, [r0, #THREAD_LAST_JAVA_SP]
 	ldr	ip, [r0, #THREAD_TOP_ZERO_FRAME]
@@ -3626,7 +3473,7 @@
 
 handle_exception_do_not_unlock:
 	mov	r3, #1
-	strb	r3, [r2, #THREAD_DO_NOT_UNLOCK]
+	strb	r3, [thread, #THREAD_DO_NOT_UNLOCK]
 	b	handle_exception_with_bcp
 
 abstractmethod_exception:
@@ -3645,7 +3492,7 @@
 	mov	r2, r1
 	ldr	r1, [dispatch, #VmSymbols_symbols_Address-XXX]
 	ldr	r1, [r1, r0, lsl #2]
-        ldr     r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	Helper_Raise
         b       handle_exception_with_bcp
 null_str:
@@ -3678,7 +3525,7 @@
 	beq	1f
 
 	mov	r0, istate
-	ldr	r1, [istate, #ISTATE_THREAD]
+	mov	r1, thread
 	bl	Helper_HandleException
 	cmp	r0, #0
 	beq	1f
@@ -3689,34 +3536,33 @@
 	CACHE_CP
 	DISPATCH 0
 1:
-	ldr	tmp2, [istate, #ISTATE_MONITOR_BASE]	@ tmp2 = base
+	ldr	r9, [istate, #ISTATE_MONITOR_BASE]	@ r9 = base
 
 	ldr	tmp1, [istate, #ISTATE_STACK_BASE]	@ tmp1 = end
-	ldr	tmp_xxx, [istate, #ISTATE_THREAD]
 
 	mov	r3, #0
-	ldrb	r0, [tmp_xxx, #THREAD_DO_NOT_UNLOCK]
-	strb	r3, [tmp_xxx, #THREAD_DO_NOT_UNLOCK]
+	ldrb	r0, [thread, #THREAD_DO_NOT_UNLOCK]
+	strb	r3, [thread, #THREAD_DO_NOT_UNLOCK]
 	cmp	r0, #0
 	bne	2f
 
-	cmp	tmp1, tmp2
+	cmp	tmp1, r9
 	blcc	return_check_monitors
 
 2:
 	mov	r3, #0
 
-	ldr	r2, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
-	str	r3, [tmp_xxx, #THREAD_LAST_JAVA_SP]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]
 	ldr	r0, [istate, #ISTATE_METHOD]
 	ldr	r3, [r2, #0]
 	ldrh	r0, [r0, #40]
 	add	r1, r2, #4
-	str	r3, [tmp_xxx, #THREAD_TOP_ZERO_FRAME]
+	str	r3, [thread, #THREAD_TOP_ZERO_FRAME]
 
 	add	r1, r1, r0, lsl #2
 
-	str	r1, [tmp_xxx, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_JAVA_SP]
 
 	mov	r0, #0	@ deoptimized_frames = 0
 	ldmfd	arm_sp!, {fast_regset, pc}
@@ -3724,19 +3570,19 @@
 return_check_monitors:
 	stmdb	arm_sp!, {r4, lr}
 
-	DECACHE_JPC
+	DECACHE_JPC	// FIXME: May not be needed.
 	ldr	r2, [istate, #ISTATE_METHOD]
 	ldr	r4, [r2, #METHOD_ACCESSFLAGS]
 	tst	r4, #1<<5
-	subne	tmp2, tmp2, #8
-	cmp	tmp1, tmp2
+	subne	r9, r9, #8
+	cmp	tmp1, r9
 	bcs	2f
 1:
 	ldr	r3, [tmp1, #4]
 	cmp	r3, #0
 	bne	3f
 	add	tmp1, tmp1, #8
-	cmp	tmp1, tmp2
+	cmp	tmp1, r9
 	bcc	1b
 
 2:
@@ -3744,48 +3590,57 @@
 
 	ldmeqia	arm_sp!, {r4, pc}
 
-	ldr	tmp1, [tmp2, #4]		@ base->obj == NULL
+	ldr	tmp1, [r9, #4]		@ base->obj == NULL
 	cmp	tmp1, #0
 	beq	4f
 
-	ldr	r0, [tmp2, #0]			@ r0 = header
+	ldr	r0, [r9, #0]			@ r0 = header
 	mov	r3, #0
 	cmp	r0, #0
-	str	r3, [tmp2, #4]			@ base->obj = NULL
+	str	r3, [r9, #4]			@ base->obj = NULL
 
 	ldmeqia	arm_sp!, {r4, pc}
 
 	mov	r1, tmp1
-	mov	r2, tmp2
+	mov	r2, r9
 	bl	cmpxchg_ptr
-	cmp	tmp2, r0
+	cmp	r9, r0
 
 	ldmeqia	arm_sp!, {r4, pc}
 
-	str	tmp1, [tmp2, #4]
-
-	mov	r1, tmp2
-	ldr	r0, [istate, #ISTATE_THREAD]
+	str	tmp1, [r9, #4]
+
+	mov	r1, r9
+	mov	r0, thread
 	bl	Helper_synchronized_exit
 
 	ldmeqia	arm_sp!, {r4, pc}
 
 3:
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	Helper_RaiseIllegalMonitorException
 	b	2b
 
 4:
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	Helper_RaiseIllegalMonitorException
 	ldmia	arm_sp!, {r4, pc}
 
-	ALIGN_CODE
+	SLOW_ENTRY
 accessor_entry:
-	b	slow_accessor_entry
-
-	ALIGN_CODE
-slow_accessor_entry:
+	stmfd	arm_sp!, {regset, lr}
+	mov	thread, r2
+	ldr	r7, [thread, #THREAD_STACK_SIZE]
+	ldr	r3, [thread, #THREAD_STACK_BASE]
+	rsb	r3, r7, r3
+	rsb	r3, r3, arm_sp
+	cmp	r3, #32768
+	blt	stack_overflow_no_frame
+	bl	fast_accessor_entry
+	ldmia	sp!, {regset, pc}
+
+	FAST_ENTRY
+fast_accessor_entry:
   USEC	adrl	ip, dispatch_init_adcon
   USEC	ldr	r3, [ip]
   USEC	add	r3, r3, ip
@@ -3797,10 +3652,9 @@
   USEC	add	r3, r3, #INVOCATIONCOUNTER_COUNTINCREMENT
   USEC	str	r3, [r0, #METHOD_INVOCATIONCOUNTER]
   USEC	cmp	r3, ip
-  USEC	bcs	normal_entry
+  USEC	bcs	fast_normal_entry
 
 	ldr	r1, [r0, #METHOD_CONSTMETHOD]
-
 	ldrb	r3, [r1, #CONSTMETHOD_CODEOFFSET+2]
 	ldrb	r1, [r1, #CONSTMETHOD_CODEOFFSET+3]
 	ldr	ip, [r0, #METHOD_CONSTANTS]
@@ -3809,14 +3663,14 @@
 
 	add	r1, ip, #CP_OFFSET
 	ldr	r3, [r1, r3, lsl #4]!		@ r1 = cache, r3 = flags
-	ldr	ip, [r2, #THREAD_JAVA_SP]			@ ip == stack
+	ldr	ip, [thread, #THREAD_JAVA_SP]			@ ip == stack
 	and	r3, r3, #0x00ff0000
 	cmp	r3, #opc_getfield << 16
 	ldr	r3, [ip, #0]
-	bne	normal_entry
+	bne	fast_normal_entry
 
 	cmp	r3, #0
-	beq	normal_entry
+	beq	fast_normal_entry
 
 	ldr	r0, [r1, #12]
 	ldr	r1, [r1, #8]
@@ -3828,46 +3682,6 @@
 	mov	r0, #0
 	bx	lr
 
-.fast_accessor_non_w:
-	bcs	.fast_accessor_h
-	beq	.fast_accessor_sb
-	tst	lr, #2
-	bne	.fast_accessor_dw
-.fast_accessor_sh:
-	DISPATCH_STATE	2
-	DISPATCH_NEXT
-	ldrsh	tmp1, [r2, r3]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUT_STACK	0, tmp1
-	DISPATCH_FINISH
-.fast_accessor_h:
-	DISPATCH_STATE	2
-	DISPATCH_NEXT
-	ldrh	tmp1, [r2, r3]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUT_STACK	0, tmp1
-	DISPATCH_FINISH
-.fast_accessor_sb:
-	DISPATCH_STATE	2
-	DISPATCH_NEXT
-	ldrsb	tmp1, [r2, r3]
-	DISPATCH_NEXT
-	DISPATCH_NEXT
-	PUT_STACK	0, tmp1
-	DISPATCH_FINISH
-.fast_accessor_dw:
-	DISPATCH_STATE	2
-	add	tmp1, r2, r3
-	DISPATCH_NEXT
-	ldm	tmp1, {tmp2, tmp1}
-	DISPATCH_NEXT
-	PUT_STACK	0, tmp1
-	DISPATCH_NEXT
-	PUSH	tmp2
-	DISPATCH_FINISH
-
 div_zero_jpc_1:
 	sub	jpc, jpc, #1
 .lrem_0:
@@ -3891,10 +3705,10 @@
 array_bound_exception_jpc_0_r3:
 	mov	r2, r3
 	b	array_bounds_exception
-array_bound_exception_jpc_1_tmp2:
+array_bound_exception_jpc_1_tmp1:
 	sub	jpc, jpc, #1
-array_bound_exception_jpc_0_tmp2:
-	mov	r2, tmp2
+array_bound_exception_jpc_0_tmp1:
+	mov	r2, tmp1
 	b	array_bounds_exception
 array_bound_exception_jpc_3:
 	sub	jpc, jpc, #1
@@ -3907,7 +3721,7 @@
 	DECACHE_JPC
         DECACHE_STACK
 	mov	r1, r2
-        ldr     r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	bl	Helper_RaiseArrayBoundException
         b       handle_exception_with_bcp
 
@@ -4840,7 +4654,7 @@
 
 #endif // USE_COMPILER
 
-#ifdef THUMB2EE
+#ifdef T2JIT
 	bl	Thumb2_Initialize
 #endif
 
@@ -5512,23 +5326,28 @@
 	.word	jrem_31
 	.word	jrem_32
 
-#ifdef THUMB2EE
+#ifdef T2JIT
+
+	.macro	LOAD_FRAME
+	ldr	Rframe, [thread, #THREAD_TOP_ZERO_FRAME]
+	.endm
+
 @ R0 = BCI
 @ R1 = index
-#define Rthread	r9
+
 	.global	Thumb2_invokeinterface_stub
 	.type Thumb2_invokeinterface_stub, %function
 Thumb2_invokeinterface_stub:
+	LOAD_FRAME
 	stmdb	sp!, {ip, lr}
-	ldr	ip, [istate, #ISTATE_METHOD]
+	ldr	ip, [Rframe, #FRAME_METHOD]
 	sub	stack, stack, #4
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [Rframe, #FRAME_CONSTANTS]
 	ldr	ip, [ip, #METHOD_CONSTMETHOD]
-	DECACHE_STACK
+	str	stack, [Rframe, #FRAME_STACK]
 	add	jpc, ip, r0
-
         add     r0, r2, r1, lsl #4
-	DECACHE_JPC
+	str	jpc, [Rframe, #FRAME_BCP]
 
         ldr     r2, [r0, #CP_OFFSET]
         and     r2, r2, #0x00ff0000
@@ -5540,15 +5359,15 @@
 	ldr	r2, [stack, r2, lsl #2]
 	cmp	r2, #0
 	beq	istub_null_ptr_exception
-	ldr	tmp2, [r2, #4]				@ rcvr->klass()
+	ldr	tmp1, [r2, #4]				@ rcvr->klass()
 	tst	r3, #flag_methodInterface
 	bne	istub_methodInterface
 
 	ldr	lr, [r0, #CP_OFFSET+4]			@ lr = iclass
 
-	add	r1, tmp2, #INSTANCEKLASS_VTABLE_OFFSET
-	ldr	r2, [tmp2, #KLASS_PART+INSTANCEKLASS_VTABLE_LEN]
-	ldr	ip, [tmp2, #KLASS_PART+INSTANCEKLASS_ITABLE_LEN]
+	add	r1, tmp1, #INSTANCEKLASS_VTABLE_OFFSET
+	ldr	r2, [tmp1, #KLASS_PART+INSTANCEKLASS_VTABLE_LEN]
+	ldr	ip, [tmp1, #KLASS_PART+INSTANCEKLASS_ITABLE_LEN]
 	add	r2, r2, #1
 	bic	r2, r2, #1
 
@@ -5565,72 +5384,71 @@
 
 	ldr	r3, [r0, #CP_OFFSET+8]
 	ldr	r2, [r1, #-4]
-	add	r3, tmp2, r3, lsl #2
-	ldr	tmp2, [r3, r2]
-	cmp	tmp2, #0
+	add	r3, tmp1, r3, lsl #2
+	ldr	tmp1, [r3, r2]
+	cmp	tmp1, #0
 	beq	istub_abstractmethod_exception
 istub_invoke:
-	ldr	ip, [tmp2, #METHOD_FROM_INTERPRETED]
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
 	mov	r1, #0
-	str	ip, [istate, #36]
-	str	r1, [Rthread, #THREAD_LAST_JAVA_SP]
-	str	r1, [Rthread, #THREAD_LAST_JAVA_FP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
 
 	add	stack, stack, #4
-	str	stack, [Rthread, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	ldr	r3, [ip]
 
-	mov	r0, tmp2
-	mov	r1, ip
-#ifndef SHARK
-	add	r3, r3, #CODE_ALIGN_SIZE
+	mov	r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+	add	r3, r3, #FAST_ENTRY_OFFSET
 #endif
-	mov	r2, Rthread
 	blx	r3
 
-	ldr	Rthread, [istate, #ISTATE_THREAD]
-
-	ldr	stack, [Rthread, #THREAD_JAVA_SP]
-	ldr	r2, [istate, #ISTATE_STACK_LIMIT]
-
-	ldr	r1, [Rthread, #THREAD_TOP_ZERO_FRAME]
+	LOAD_FRAME
+
+	ldr	stack, [thread, #THREAD_JAVA_SP]
+	ldr	r2, [Rframe, #FRAME_STACK_LIMIT]
+
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
 	add	r2, r2, #4
 	mov	r0, #0
-	str	r0, [Rthread, #THREAD_LAST_JAVA_SP]
-	str	r2, [Rthread, #THREAD_JAVA_SP]
-	str	r1, [Rthread, #THREAD_LAST_JAVA_FP]
-	str	r2, [Rthread, #THREAD_LAST_JAVA_SP]
-	ldr	r3, [Rthread, #4]
+	str	r0, [thread, #THREAD_LAST_JAVA_SP]
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r3, [thread, #4]
 	cmp	r3, #0
 	bne	istub_exception
 	ldmia	sp!, {ip, pc}
 
 istub_methodInterface:
 	tst	r3, #flag_vfinalMethod
-	ldrne	tmp2, [r0, #CP_OFFSET+8]
+	ldrne	tmp1, [r0, #CP_OFFSET+8]
 	bne	istub_invoke
 	ldr	r1, [r0, #CP_OFFSET+8]
-	add	r3, tmp2, r1, lsl #2
-	ldr	tmp2, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+	add	r3, tmp1, r1, lsl #2
+	ldr	tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
 	b	istub_invoke
 
 istub_resolve:
-	mov	tmp2, r1
+	mov	tmp1, r1
 	mov	r1, #opc_invokeinterface
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	ldr	ip, resolve_invoke_adcon
 	blx	ip
-	ldr	r3, [Rthread, #4]
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r3, [thread, #4]
+	ldr	r2, [Rframe, #FRAME_CONSTANTS]
 	cmp	r3, #0
 	bne	istub_exception
-	add	r0, r2, tmp2, lsl #4	@ r1 = cache
+	add	r0, r2, tmp1, lsl #4	@ r1 = cache
 	b	2b
 
 istub_exception:
 	ldmia	sp!, {ip, lr}
 	ldr	ip, handle_exception_adcon
+	LOAD_ISTATE
 	bx	ip
 
 istub_null_ptr_exception:
@@ -5642,9 +5460,10 @@
 istub_incompatibleclass_exception:
 	mov	r0, #VMSYMBOLS_IncompatibleClassChangeError
 3:
-	CACHE_JPC
+	ldr	jpc, [Rframe, #FRAME_BCP]
 	ldmia	sp!, {ip, lr}
 	ldr	ip, raise_exception_adcon
+	LOAD_ISTATE
 	bx	ip
 
 resolve_invoke_adcon:
@@ -5657,22 +5476,24 @@
 	.word	raise_exception
 helper_aputfield_adcon:
 	.word	Helper_aputfield
+lr_to_bci_adcon:
+	.word	Thumb2_lr_to_bci
 
 @ R0 = BCI
 @ R1 = index
 	.global	Thumb2_invokevirtual_stub
 	.type Thumb2_invokevirtual_stub, %function
 Thumb2_invokevirtual_stub:
+	LOAD_FRAME
 	stmdb	sp!, {ip, lr}
-        ldr     ip, [istate, #ISTATE_METHOD]
+        ldr     ip, [Rframe, #FRAME_METHOD]
         sub     stack, stack, #4
-        ldr     r2, [istate, #ISTATE_CONSTANTS]
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
         ldr     ip, [ip, #METHOD_CONSTMETHOD]
-        DECACHE_STACK
+	str	stack, [Rframe, #FRAME_STACK]
         add     jpc, ip, r0
-
         add     r0, r2, r1, lsl #4
-        DECACHE_JPC
+	str	jpc, [Rframe, #FRAME_BCP]
 
         ldr     r2, [r0, #CP_OFFSET]
         and     r2, r2, #0xff000000
@@ -5686,152 +5507,304 @@
         cmp     r2, #0
         beq     istub_null_ptr_exception
 
-        ldr     tmp2, [r0, #CP_OFFSET+8]
+        ldr     tmp1, [r0, #CP_OFFSET+8]
         tst     r3, #flag_vfinalMethod
         bne     1f
 
         ldr     r3, [r2, #4]
-        add     r3, r3, tmp2, lsl #2
-        ldr     tmp2, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+        add     r3, r3, tmp1, lsl #2
+        ldr     tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
 1:
 	mov	r1, #0
-        ldr     ip, [tmp2, #METHOD_FROM_INTERPRETED]
-        str     r1, [Rthread, #THREAD_LAST_JAVA_SP]
-        str     r1, [Rthread, #THREAD_LAST_JAVA_FP]
-        str     ip, [istate, #36]
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
 
         add     stack, stack, #4
-        str     stack, [Rthread, #THREAD_JAVA_SP]
+        str     stack, [thread, #THREAD_JAVA_SP]
 
         ldr     r3, [ip, #0]
 
-	mov	r0, tmp2
-	mov	r1, ip
-#ifndef SHARK
-	add	r3, r3, #CODE_ALIGN_SIZE
+	mov	r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+	add	r3, r3, #FAST_ENTRY_OFFSET
 #endif
-	mov	r2, Rthread
 	blx	r3
-
-        ldr     Rthread, [istate, #ISTATE_THREAD]
-
-	ldr	stack, [Rthread, #THREAD_JAVA_SP]
-	ldr	r2, [istate, #ISTATE_STACK_LIMIT]
+	LOAD_FRAME
+
+	ldr	stack, [thread, #THREAD_JAVA_SP]
+	ldr	r2, [Rframe, #FRAME_STACK_LIMIT]
 
 	mov	r0, #0
-	str	r0, [Rthread, #THREAD_LAST_JAVA_SP]
-	ldr	r1, [Rthread, #THREAD_TOP_ZERO_FRAME]
+	str	r0, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r1, [thread, #THREAD_TOP_ZERO_FRAME]
 	add	r2, r2, #4
-	str	r2, [Rthread, #THREAD_JAVA_SP]
-	str	r1, [Rthread, #THREAD_LAST_JAVA_FP]
-	str	r2, [Rthread, #THREAD_LAST_JAVA_SP]
-	ldr	r3, [Rthread, #4]
+	str	r2, [thread, #THREAD_JAVA_SP]
+	str	r1, [thread, #THREAD_LAST_JAVA_FP]
+	str	r2, [thread, #THREAD_LAST_JAVA_SP]
+	ldr	r3, [thread, #4]
 	cmp	r3, #0
 	bne	istub_exception
 	ldmia	sp!, {ip, pc}
 
 ivstub_resolve:
-	mov	tmp2, r1
+	mov	tmp1, r1
 	mov	r1, #opc_invokevirtual
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	ldr	ip, resolve_invoke_adcon
 	blx	ip
-	ldr	r3, [Rthread, #4]
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r3, [thread, #4]
+	ldr	r2, [Rframe, #FRAME_CONSTANTS]
 	cmp	r3, #0
 	bne	istub_exception
-	add	r0, r2, tmp2, lsl #4	@ r1 = cache
+	add	r0, r2, tmp1, lsl #4	@ r1 = cache
 	b	2b
 
 @ R0 = BCI
 @ R1 = index
+	.global	Thumb2_invokevfinalresolved_stub
+Thumb2_invokevfinalresolved_stub:
+	LOAD_FRAME
+        stmdb   sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+        ldr     r3, [r0, #CP_OFFSET+12]
+        and     r2, r3, #255
+        ldr     r2, [stack, r2, asl #2]
+        cmp     r2, #0
+        beq     istub_null_ptr_exception
+
+        ldr     tmp1, [r0, #CP_OFFSET+8]
+        mov     r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+
+        ldr     r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        add     r2, r2, #4
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP]
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+        ldmia   sp!, {ip, pc}
+
+@ R0 = BCI
+@ R1 = index
+	.global	Thumb2_invokevirtualresolved_stub
+Thumb2_invokevirtualresolved_stub:
+	LOAD_FRAME
+        stmdb   sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+
+        ldr     r3, [r0, #CP_OFFSET+12]
+        and     r2, r3, #255
+        ldr     r2, [stack, r2, asl #2]
+        cmp     r2, #0
+        beq     istub_null_ptr_exception
+
+        ldr     tmp1, [r0, #CP_OFFSET+8]
+        ldr     r3, [r2, #4]
+        add     r3, r3, tmp1, lsl #2
+        ldr     tmp1, [r3, #INSTANCEKLASS_VTABLE_OFFSET]
+        mov     r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+
+        ldr     r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        add     r2, r2, #4
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP]
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+        ldmia   sp!, {ip, pc}
+
+@ R0 = BCI
+@ R1 = index
 	.global	Thumb2_invokestatic_stub
 	.type Thumb2_invokestatic_stub, %function
 Thumb2_invokestatic_stub:
+	LOAD_FRAME
         stmdb   sp!, {ip, lr}
-        ldr     ip, [istate, #ISTATE_METHOD]
+        ldr     ip, [Rframe, #FRAME_METHOD]
         sub     stack, stack, #4
-        ldr     r2, [istate, #ISTATE_CONSTANTS]
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
         ldr     ip, [ip, #METHOD_CONSTMETHOD]
-        DECACHE_STACK
+        DECACHE_STACK_USING_FRAME
         add     jpc, ip, r0
 
         add     r0, r2, r1, lsl #4
-        DECACHE_JPC
+        DECACHE_JPC_USING_FRAME
 
         ldr     r2, [r0, #CP_OFFSET]
 	and	r2, r2, #0x00ff0000
 	cmp	r2, #opc_invokestatic << 16
 	bne	isstub_resolve
 2:
-	ldr	tmp2, [r0, #CP_OFFSET+4]
+	ldr	tmp1, [r0, #CP_OFFSET+4]
 	mov	r1, #0
-	ldr	ip, [tmp2, #METHOD_FROM_INTERPRETED]
-	str	r1, [Rthread, #THREAD_LAST_JAVA_SP]
-	str	ip, [istate, #36]
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
 
 	add	stack, stack, #4
-	str	stack, [Rthread, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	ldr	r3, [ip, #0]
 
-        mov     r0, tmp2
-        mov     r1, ip
-#ifndef SHARK
-        add     r3, r3, #CODE_ALIGN_SIZE
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
 #endif
-        mov     r2, Rthread
         blx     r3
-
-        ldr     Rthread, [istate, #ISTATE_THREAD]
-
-        ldr     stack, [Rthread, #THREAD_JAVA_SP]
-        ldr     r2, [istate, #ISTATE_STACK_LIMIT]
-
-        ldr     r1, [Rthread, #THREAD_TOP_ZERO_FRAME]
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        ldr     r1, [thread, #THREAD_TOP_ZERO_FRAME]
         add     r2, r2, #4
 	mov	r3, #0
-	str	r3, [Rthread, #THREAD_LAST_JAVA_SP]	
-        str     r1, [Rthread, #THREAD_LAST_JAVA_FP]
-        str     r2, [Rthread, #THREAD_JAVA_SP]
-        str     r2, [Rthread, #THREAD_LAST_JAVA_SP]
-        ldr     r3, [Rthread, #4]
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]	
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP] // FIXME: Don't understand this
+        ldr     r3, [thread, #4]
         cmp     r3, #0
         bne     istub_exception
 	mov	r0, #0
         ldmia   sp!, {ip, pc}
 
 isstub_resolve:
-        mov     tmp2, r1
+        mov     tmp1, r1
         mov     r1, #opc_invokestatic
-        ldr     r0, [istate, #ISTATE_THREAD]
+        mov	r0, thread
         ldr     ip, resolve_invoke_adcon
         blx     ip
-        ldr     r3, [Rthread, #4]
-        ldr     r2, [istate, #ISTATE_CONSTANTS]
+        ldr     r3, [thread, #4]
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
         cmp     r3, #0
         bne     istub_exception
-        add     r0, r2, tmp2, lsl #4    @ r1 = cache
+        add     r0, r2, tmp1, lsl #4    @ r1 = cache
         b       2b
 
 @ R0 = BCI
 @ R1 = index
+	.global	Thumb2_invokestaticresolved_stub
+Thumb2_invokestaticresolved_stub:
+	LOAD_FRAME
+	stmdb	sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+        ldr     tmp1, [r0, #CP_OFFSET+4]
+        mov     r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        ldr     r1, [thread, #THREAD_TOP_ZERO_FRAME]
+        add     r2, r2, #4
+	mov	r3, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]	
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP] // FIXME: Don't understand this
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+	mov	r0, #0
+        ldmia   sp!, {ip, pc}
+
+@ R0 = BCI
+@ R1 = index
 	.global	Thumb2_invokespecial_stub
 	.type Thumb2_invokespecial_stub, %function
 Thumb2_invokespecial_stub:
+	LOAD_FRAME
         stmdb   sp!, {ip, lr}
-        ldr     ip, [istate, #ISTATE_METHOD]
+        ldr     ip, [Rframe, #FRAME_METHOD]
         sub     stack, stack, #4
-        ldr     r2, [istate, #ISTATE_CONSTANTS]
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
         ldr     ip, [ip, #METHOD_CONSTMETHOD]
-        DECACHE_STACK
+        DECACHE_STACK_USING_FRAME
         add     jpc, ip, r0
 
         add     r0, r2, r1, lsl #4
-        DECACHE_JPC
-
+        DECACHE_JPC_USING_FRAME
+ 
         ldr     r2, [r0, #CP_OFFSET]
-	and	r2, r2, #0x00ff0000
+ 	and	r2, r2, #0x00ff0000
 	cmp	r2, #opc_invokespecial << 16
 	bne	ispstub_resolve
 2:
@@ -5841,59 +5814,118 @@
 	cmp	r2, #0
 	beq	istub_null_ptr_exception
 
-	ldr	tmp2, [r0, #CP_OFFSET+4]
+	ldr	tmp1, [r0, #CP_OFFSET+4]
 	mov	r1, #0
-	ldr	ip, [tmp2, #METHOD_FROM_INTERPRETED]
-	str	r1, [Rthread, #THREAD_LAST_JAVA_SP]
-	str	ip, [istate, #36]
+	ldr	ip, [tmp1, #METHOD_FROM_INTERPRETED]
+	str	r1, [thread, #THREAD_LAST_JAVA_SP]
 
 	add	stack, stack, #4
-	str	stack, [Rthread, #THREAD_JAVA_SP]
+	str	stack, [thread, #THREAD_JAVA_SP]
 
 	ldr	r3, [ip, #0]
 
-        mov     r0, tmp2
-        mov     r1, ip
-#ifndef SHARK
-        add     r3, r3, #CODE_ALIGN_SIZE
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
 #endif
-        mov     r2, Rthread
+        mov     r2, thread
         blx     r3
-
-        ldr     Rthread, [istate, #ISTATE_THREAD]
-
-        ldr     stack, [Rthread, #THREAD_JAVA_SP]
-        ldr     r2, [istate, #ISTATE_STACK_LIMIT]
-
-        ldr     r1, [Rthread, #THREAD_TOP_ZERO_FRAME]
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        ldr     r1, [thread, #THREAD_TOP_ZERO_FRAME]
         add     r2, r2, #4
-        str     r2, [Rthread, #THREAD_JAVA_SP]
-        str     r2, [Rthread, #THREAD_LAST_JAVA_SP]
-        str     r1, [Rthread, #THREAD_LAST_JAVA_FP]
-        ldr     r3, [Rthread, #4]
+	mov	r3, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]	
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP] // FIXME: Don't understand this
+        ldr     r3, [thread, #4]
         cmp     r3, #0
         bne     istub_exception
+	mov	r0, #0
         ldmia   sp!, {ip, pc}
 
 ispstub_resolve:
-        mov     tmp2, r1
+        mov     tmp1, r1
         mov     r1, #opc_invokespecial
-        ldr     r0, [istate, #ISTATE_THREAD]
+        mov	r0, thread
         ldr     ip, resolve_invoke_adcon
         blx     ip
-        ldr     r3, [Rthread, #4]
-        ldr     r2, [istate, #ISTATE_CONSTANTS]
+        ldr     r3, [thread, #4]
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
         cmp     r3, #0
         bne     istub_exception
-        add     r0, r2, tmp2, lsl #4    @ r1 = cache
+        add     r0, r2, tmp1, lsl #4    @ r1 = cache
         b       2b
 
 @ R0 = BCI
 @ R1 = index
+	.global	Thumb2_invokespecialresolved_stub
+Thumb2_invokespecialresolved_stub:
+	LOAD_FRAME
+        stmdb   sp!, {ip, lr}
+        ldr     ip, [Rframe, #FRAME_METHOD]
+        sub     stack, stack, #4
+        ldr     r2, [Rframe, #FRAME_CONSTANTS]
+        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+        DECACHE_STACK_USING_FRAME
+        add     jpc, ip, r0
+
+        add     r0, r2, r1, lsl #4
+        DECACHE_JPC_USING_FRAME
+        ldr     r3, [r0, #CP_OFFSET+12]
+        and     r3, r3, #255
+        ldr     r2, [stack, r3, asl #2]
+        cmp     r2, #0
+        beq     istub_null_ptr_exception
+
+        ldr     tmp1, [r0, #CP_OFFSET+4]
+        mov     r1, #0
+        ldr     ip, [tmp1, #METHOD_FROM_INTERPRETED]
+        str     r1, [thread, #THREAD_LAST_JAVA_SP]
+
+        add     stack, stack, #4
+        str     stack, [thread, #THREAD_JAVA_SP]
+
+        ldr     r3, [ip, #0]
+
+        mov     r0, tmp1
+#ifdef SHARK
+	mov	r2, thread
+#else
+        add     r3, r3, #FAST_ENTRY_OFFSET
+#endif
+        blx     r3
+	LOAD_FRAME
+
+        ldr     stack, [thread, #THREAD_JAVA_SP]
+        ldr     r2, [Rframe, #FRAME_STACK_LIMIT]
+
+        ldr     r1, [thread, #THREAD_TOP_ZERO_FRAME]
+        add     r2, r2, #4
+	mov	r3, #0
+	str	r3, [thread, #THREAD_LAST_JAVA_SP]	
+        str     r1, [thread, #THREAD_LAST_JAVA_FP]
+        str     r2, [thread, #THREAD_JAVA_SP]
+        str     Rframe, [thread, #THREAD_LAST_JAVA_SP] // FIXME: Don't understand this
+        ldr     r3, [thread, #4]
+        cmp     r3, #0
+        bne     istub_exception
+	mov	r0, #0
+        ldmia   sp!, {ip, pc}
+
+@ R0 = BCI
+@ R1 = index
 	.global	Thumb2_getfield_word_stub
 	.type Thumb2_getfield_word_stub, %function
 Thumb2_getfield_word_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -5927,7 +5959,8 @@
 	.global	Thumb2_getfield_sh_stub
 	.type Thumb2_getfield_sh_stub, %function
 Thumb2_getfield_sh_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -5960,7 +5993,8 @@
 	.global	Thumb2_getfield_h_stub
 	.type Thumb2_getfield_h_stub, %function
 Thumb2_getfield_h_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -5993,7 +6027,8 @@
 	.global	Thumb2_getfield_sb_stub
 	.type Thumb2_getfield_sb_stub, %function
 Thumb2_getfield_sb_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -6026,7 +6061,8 @@
 	.global	Thumb2_getfield_dw_stub
 	.type Thumb2_getfield_dw_stub, %function
 Thumb2_getfield_dw_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -6044,7 +6080,7 @@
 	stmdb	stack!, {r2, r3}	@ PUSH r2, r3
 	bx	lr
 3:
-	ldrd	r2, r3, [r3, ip]
+	ldrd	r2, r3, [r3, ip]	// FIXME: Shold be ldrexd
 	FullBarrier
 	stmdb	stack!, {r2, r3}	@ PUSH r2, r3
 	bx	lr
@@ -6070,42 +6106,59 @@
 	mov	r2, #opc_getfield
 field_stub_unresolved:
 	stmdb	sp!, {r0, r1, ip, lr}
-        ldr     ip, [istate, #ISTATE_METHOD]
+	ldr	lr, [thread, #THREAD_TOP_ZERO_FRAME]
+        ldr     ip, [lr, #FRAME_METHOD]
 	sub	r3, stack, #4
 	ldr	ip, [ip, #METHOD_CONSTMETHOD]
-	str	r3, [istate, #ISTATE_STACK]	@ DECACHE_STACK
+	str	r3, [lr, #FRAME_STACK]	@ DECACHE_STACK
 	add	r3, ip, r0
-	str	r3, [istate, #ISTATE_BCP]	@ DECACHE_JPC
+	str	r3, [lr, #FRAME_BCP]	@ DECACHE_JPC
 	ldr	ip, resolve_get_put_adcon
 	mov	r1, r2
-	ldr	r0, [istate, #ISTATE_THREAD]
+	mov	r0, thread
 	blx	ip
 	ldmia	sp!, {r0, r1, ip, lr}
-	ldr	r3, [Rthread, #4]
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r3, [thread, #4]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	cmp	r3, #0
 	bne	field_exception
 	add	r2, r2, r1, lsl #4
 	bx	lr
 
 field_null_ptr_exception:
-        ldr     ip, [istate, #ISTATE_METHOD]
-        ldr     ip, [ip, #METHOD_CONSTMETHOD]
+	stmdb	sp!, {JAZ_REGSET}
+	ldr	ip, [thread, #THREAD_TOP_ZERO_FRAME]
+        ldr     r1, [ip, #FRAME_METHOD]
+	ldr	r3, [ip, #FRAME_LOCALS]
+        ldr     ip, [r1, #METHOD_CONSTMETHOD]
         add     jpc, ip, r0
-	mov	r0, #VMSYMBOLS_NullPointerException
-	ldr	ip, raise_exception_adcon
-	bx	ip
-
-field_exception:
-	ldr	ip, handle_exception_adcon
-	bx	ip
+ 	mov	r0, #VMSYMBOLS_NullPointerException
+	bic	r0, lr, #TBIT
+	mov	r2, sp
+
+@ We already have BCI, so just call lr_to_bci to save the locals
+@ The result value is ignored
+	ldr	ip, lr_to_bci_adcon
+	blx	ip
+
+	add	sp, sp, #JAZ_REGSET_LEN * 4
+ 	ldr	ip, raise_exception_adcon
+	LOAD_ISTATE
+ 	bx	ip
+ 
+ field_exception:
+ 	ldr	ip, handle_exception_adcon
+	LOAD_ISTATE
+ 	bx	ip
 
 @ R0 = BCI
 @ R1 = index
 	.global	Thumb2_putfield_word_stub
 	.type Thumb2_putfield_word_stub, %function
 Thumb2_putfield_word_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6140,7 +6193,8 @@
 	.global	Thumb2_putfield_h_stub
 	.type Thumb2_putfield_h_stub, %function
 Thumb2_putfield_h_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6175,7 +6229,8 @@
 	.global	Thumb2_putfield_b_stub
 	.type Thumb2_putfield_b_stub, %function
 Thumb2_putfield_b_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6210,7 +6265,8 @@
 	.global	Thumb2_putfield_a_stub
 	.type Thumb2_putfield_a_stub, %function
 Thumb2_putfield_a_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6249,7 +6305,8 @@
 	.global	Thumb2_putfield_dw_stub
 	.type Thumb2_putfield_dw_stub, %function
 Thumb2_putfield_dw_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6272,6 +6329,7 @@
 	beq	field_null_ptr_exception
 
 	StoreStoreBarrier
+	// FIXME: This should use strexd on an MP system
 	strd	r2,r3, [ip, r1]
 	StoreLoadBarrier
 	bx	lr
@@ -6286,7 +6344,8 @@
 	.global	Thumb2_getstatic_word_stub
 	.type Thumb2_getstatic_word_stub, %function
 Thumb2_getstatic_word_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -6318,7 +6377,8 @@
 	.global	Thumb2_getstatic_h_stub
 	.type Thumb2_getstatic_h_stub, %function
 Thumb2_getstatic_h_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -6350,7 +6410,8 @@
 	.global	Thumb2_getstatic_sh_stub
 	.type Thumb2_getstatic_sh_stub, %function
 Thumb2_getstatic_sh_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -6382,7 +6443,8 @@
 	.global	Thumb2_getstatic_sb_stub
 	.type Thumb2_getstatic_sb_stub, %function
 Thumb2_getstatic_sb_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -6414,7 +6476,8 @@
 	.global	Thumb2_getstatic_dw_stub
 	.type Thumb2_getstatic_dw_stub, %function
 Thumb2_getstatic_dw_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0x00ff0000
@@ -6435,6 +6498,7 @@
 
 	ldrd	r2, r3, [r3, ip]
 	FullBarrier
+	// FIXME: This should use strexd on an MP system
 	stmdb	stack!, {r2, r3}	@ PUSH r2, r3
 	bx	lr
 1:
@@ -6448,7 +6512,8 @@
 	.global	Thumb2_putstatic_word_stub
 	.type Thumb2_putstatic_word_stub, %function
 Thumb2_putstatic_word_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6483,7 +6548,8 @@
 	.global	Thumb2_putstatic_h_stub
 	.type Thumb2_putstatic_h_stub, %function
 Thumb2_putstatic_h_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6518,7 +6584,8 @@
 	.global	Thumb2_putstatic_b_stub
 	.type Thumb2_putstatic_b_stub, %function
 Thumb2_putstatic_b_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6553,7 +6620,8 @@
 	.global	Thumb2_putstatic_dw_stub
 	.type Thumb2_putstatic_dw_stub, %function
 Thumb2_putstatic_dw_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6588,7 +6656,8 @@
 	.global	Thumb2_putstatic_a_stub
 	.type Thumb2_putstatic_a_stub, %function
 Thumb2_putstatic_a_stub:
-	ldr	r2, [istate, #ISTATE_CONSTANTS]
+	ldr	r2, [thread, #THREAD_TOP_ZERO_FRAME]
+	ldr	r2, [r2, #FRAME_CONSTANTS]
 	add	r2, r2, r1, lsl #4
 	ldr	r3, [r2, #CP_OFFSET]
 	and	r3, r3, #0xff000000
@@ -6622,7 +6691,7 @@
 	mov	lr, ip
 	b	2b
 
-#endif // THUMB2EE
+#endif // T2JIT
 
 	.global	Thumb2_stubs_end
 	.type Thumb2_stubs_end, %function
@@ -7083,21 +7152,10 @@
 dc_18:
 	.word     0x38e38e39
 
-#define TBIT 1
-
 	.global	Thumb2_DivZero_Handler
 	.type Thumb2_DivZero_Handler, %function
 Thumb2_DivZero_Handler:
-#ifdef THUMB2EE
-
-#define JAZ_V1	r5
-#define JAZ_V2	r6
-#define JAZ_V3	r7
-#define	JAZ_V4	r10
-#define	JAZ_V5	r11
-
-#define JAZ_REGSET	JAZ_V1,JAZ_V2,JAZ_V3,JAZ_V4,JAZ_V5,ip
-#define JAZ_REGSET_LEN	6
+#ifdef T2JIT
 
 	adrl	r0, idiv_clz_ret
 	cmp	r0, lr
@@ -7105,22 +7163,24 @@
 	cmpne	r0, lr
 	beq	divide_by_zero_exception
 	stmdb	sp!, {JAZ_REGSET}
+	LOAD_FRAME
 	bic	r0, lr, #TBIT
-	ldr	r1, [istate, #ISTATE_METHOD]
+	ldr	r1, [Rframe, #FRAME_METHOD]
         ldr     jpc, [r1, #METHOD_CONSTMETHOD]
 	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
 	mov	r2, sp
-	ldr	r3, [istate, #ISTATE_LOCALS]
+	ldr	r3, [Rframe, #FRAME_LOCALS]
 	bl	Thumb2_lr_to_bci
 	add	sp, sp, #JAZ_REGSET_LEN * 4
 	cmp	r0, #-1
 	moveq	jpc, #0
 	addne	jpc, jpc, r0
 	bl	load_dispatch
-#endif // THUMB2EE
+	LOAD_ISTATE
+#endif // T2JIT
 	b	divide_by_zero_exception
 
-#ifdef THUMB2EE
+#ifdef T2JIT
 
 	.global	Thumb2_Handle_Exception
 	.type Thumb2_Handle_Exception, %function
@@ -7134,12 +7194,13 @@
 	.type Thumb2_Stack_Overflow, %function
 Thumb2_ArrayBounds_Handler:
 	stmdb	sp!, {JAZ_REGSET}
+	LOAD_FRAME
 	bic	r0, lr, #TBIT
-	ldr	r1, [istate, #ISTATE_METHOD]
+	ldr	r1, [Rframe, #FRAME_METHOD]
         ldr     jpc, [r1, #METHOD_CONSTMETHOD]
 	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
 	mov	r2, sp
-	ldr	r3, [istate, #ISTATE_LOCALS]
+	ldr	r3, [Rframe, #FRAME_LOCALS]
 	bl	Thumb2_lr_to_bci
 	add	sp, sp, #JAZ_REGSET_LEN * 4
 	cmp	r0, #-1
@@ -7147,25 +7208,32 @@
 	addne	jpc, jpc, r0
 	bl	load_dispatch
 	mov	r0, #VMSYMBOLS_ArrayIndexOutOfBounds
+	LOAD_ISTATE
 	b	raise_exception
 Thumb2_Handle_Exception:
 	stmdb	sp!, {JAZ_REGSET}
+	LOAD_FRAME
 	bic	r0, lr, #TBIT
-	ldr	r1, [istate, #ISTATE_METHOD]
+	ldr	r1, [Rframe, #FRAME_METHOD]
         ldr     jpc, [r1, #METHOD_CONSTMETHOD]
 	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
 	mov	r2, sp
-	ldr	r3, [istate, #ISTATE_LOCALS]
+	ldr	r3, [Rframe, #FRAME_LOCALS]
 	bl	Thumb2_lr_to_bci
 	add	sp, sp, #JAZ_REGSET_LEN * 4
 	cmp	r0, #-1
 	moveq	jpc, #0
 	addne	jpc, jpc, r0
 	bl	load_dispatch
+	LOAD_ISTATE
 	b	handle_exception
 Thumb2_Handle_Exception_NoRegs:
+	LOAD_FRAME
+	ldr	r0, [Rframe, #FRAME_STACK_LIMIT]
+	add	r0, r0, #4
+	str	r0, [thread, #THREAD_JAVA_SP]
 	bic	r0, lr, #TBIT
-	ldr	r1, [istate, #ISTATE_METHOD]
+	ldr	r1, [Rframe, #FRAME_METHOD]
         ldr     jpc, [r1, #METHOD_CONSTMETHOD]
 	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
 	mov	r2, #0
@@ -7174,25 +7242,28 @@
 	moveq	jpc, #0
 	addne	jpc, jpc, r0
 	bl	load_dispatch
+	LOAD_ISTATE
 	b	handle_exception
 Thumb2_NullPtr_Handler:
 	stmdb	sp!, {JAZ_REGSET}
+	LOAD_FRAME
 	bic	r0, lr, #TBIT
-	ldr	r1, [istate, #ISTATE_METHOD]
+	ldr	r1, [Rframe, #FRAME_METHOD]
         ldr     jpc, [r1, #METHOD_CONSTMETHOD]
 	add	jpc, jpc, #CONSTMETHOD_CODEOFFSET
 	mov	r2, sp
-	ldr	r3, [istate, #ISTATE_LOCALS]
+	ldr	r3, [Rframe, #FRAME_LOCALS]
 	bl	Thumb2_lr_to_bci
 	add	sp, sp, #JAZ_REGSET_LEN * 4
 	cmp	r0, #-1
 	moveq	jpc, #0
 	addne	jpc, jpc, r0
 	bl	load_dispatch
+	LOAD_ISTATE
 	b	null_ptr_exception
 
 Thumb2_Stack_Overflow:
-	mov	r0, r2
+	mov	r0, thread
 	mov	r2, #0
 	str	r2, [r0, #THREAD_LAST_JAVA_SP]
 	ldr	ip, [r0, #THREAD_TOP_ZERO_FRAME]
@@ -7206,12 +7277,29 @@
 	.global	Thumb2_Exit_To_Interpreter
 	.type Thumb2_Exit_To_Interpreter, %function
 Thumb2_Exit_To_Interpreter:
+	LOAD_ISTATE
 	bl	load_dispatch
 	sub	stack, stack, #4
 	CACHE_CP
 	CACHE_LOCALS
 	DISPATCH	0
 
+	.global	Thumb2_monitorenter
+Thumb2_monitorenter:
+	stmdb	sp!, {ip, lr}
+	sub	stack, stack, #4
+	mov	r0, r8
+	POP	r1
+	DECACHE_JPC
+	DECACHE_STACK
+	bl	Helper_monitorenter
+	CACHE_STACK		@ monitorenter may expand stack!!!
+	ldmia	sp!, {ip, lr}
+	cmp	r0, #0
+	bne	handle_exception
+	add	stack, stack, #4
+	bx	lr
+
 	.global	Thumb2_Clear_Cache
 	.type Thumb2_Clear_Cache, %function
 Thumb2_Clear_Cache:
@@ -7223,7 +7311,7 @@
 	ldmia	sp!, {r7}
 	bx	lr
 
-#endif // THUMB2EE
+#endif // T2JIT
 
 	.section	.init_array,"aw",%init_array
 	.word	bci_init(target1)
--- a/src/cpu/zero/vm/thumb2.cpp	Wed May 16 11:21:07 2012 +0100
+++ b/src/cpu/zero/vm/thumb2.cpp	Thu May 17 13:45:50 2012 -0400
@@ -19,12 +19,12 @@
 
 #ifdef __arm__
 
-#undef THUMB2EE
+#undef T2JIT
 #if !defined(DISABLE_THUMB2) && defined(HOTSPOT_ASM) && !defined(SHARK)
-#define THUMB2EE
+#define T2JIT
 #endif
 
-#ifdef THUMB2EE
+#ifdef T2JIT
 
 // setting DISABLE_THUMB2_JVMTI at build time disables notification
 // of JVMTI dynamic_generate and compiled_method_load events
@@ -33,24 +33,33 @@
 #define THUMB2_JVMTI
 #endif
 
-#define T2EE_PRINT_COMPILATION
-#define T2EE_PRINT_STATISTICS
-#define T2EE_PRINT_DISASS
+#define T2_PRINT_COMPILATION
+#define T2_PRINT_STATISTICS
+#define T2_PRINT_DISASS
+#define T2_PRINT_REGUSAGE
+
 #define T2EE_PRINT_REGUSAGE
-
-#ifdef T2EE_PRINT_COMPILATION
-static char *t2ee_print_compilation;
+#define CODE_ALIGN 64
+
+#define SLOW_ENTRY_OFFSET 24
+#define FAST_ENTRY_OFFSET 40
+
+#ifdef T2_PRINT_STATISTICS
+static char *t2_print_statistics;
 #endif
 
-#ifdef T2EE_PRINT_STATISTICS
-static char *t2ee_print_statistics;
+#ifdef T2_PRINT_REGUSAGE
+static char *t2_print_regusage;
 #endif
 
-#ifdef T2EE_PRINT_REGUSAGE
-static char *t2ee_print_regusage;
+static char *t2_ospace;
+#define OSPACE t2_ospace
+
+#ifdef PRODUCT
+#define THUMB2_CODEBUF_SIZE (8 * 1024 * 1024)
+#else
+#define THUMB2_CODEBUF_SIZE (4 * 1024 * 1024)
 #endif
-
-#define THUMB2_CODEBUF_SIZE (8 * 1024 * 1024)
 #define THUMB2_MAX_BYTECODE_SIZE 10000
 #define THUMB2_MAX_T2CODE_SIZE 65000
 #define THUMB2_MAXLOCALS 1000
@@ -346,17 +355,14 @@
 
 #define H_EXIT_TO_INTERPRETER		28
 
-#define H_GETSTATIC			H_EXIT_TO_INTERPRETER
-#define H_PUTSTATIC			H_EXIT_TO_INTERPRETER
-#define H_JSR				H_EXIT_TO_INTERPRETER
 #define H_RET				H_EXIT_TO_INTERPRETER
-#define H_ZOMBIE			H_EXIT_TO_INTERPRETER
-#define H_MONITOR			H_EXIT_TO_INTERPRETER
+#define H_DEADCODE			H_EXIT_TO_INTERPRETER
 #define H_ATHROW			H_EXIT_TO_INTERPRETER
 
 #define H_HANDLE_EXCEPTION		29
 #define H_ARRAYBOUND			30
-#define H_UNKNOWN			31
+
+#define H_LDC_W				31
 
 #define H_DEBUG_METHODENTRY		32
 #define H_DEBUG_METHODEXIT		33
@@ -395,9 +401,17 @@
 
 #define H_HANDLE_EXCEPTION_NO_REGS	60
 
-#define H_SAFEPOINT              	61
-
-#define H_LAST                          62  // Not used
+#define H_INVOKESTATIC_RESOLVED		61
+#define H_INVOKESPECIAL_RESOLVED	62
+#define H_INVOKEVIRTUAL_RESOLVED	63
+#define H_INVOKEVFINAL			64
+
+#define H_MONITORENTER			65
+#define H_MONITOREXIT			66
+
+#define H_SAFEPOINT              	67
+
+#define H_LAST                          68  // Not used
 
 unsigned handlers[H_LAST];
 
@@ -520,11 +534,13 @@
 }
 #endif // THUMB2_JVMTI
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
 short start_bci[THUMB2_MAX_T2CODE_SIZE];
 short end_bci[THUMB2_MAX_T2CODE_SIZE];
 #endif
 
+bool DebugSwitch = false;
+
 // XXX hardwired constants!
 #define ENTRY_FRAME             1
 #define INTERPRETER_FRAME       2
@@ -533,16 +549,13 @@
 
 #include "offsets_arm.s"
 
-#define BC_FLAGS_MASK		0xfc000000
-#define BC_VISITED_P1		0x80000000
+#define BC_FLAGS_MASK		0xf0000000
+#define BC_VISITED		0x80000000
 #define BC_BRANCH_TARGET	0x40000000
 #define BC_COMPILED		0x20000000
-#define BC_VISITED_P2		0x10000000
-#define BC_ZOMBIE		0x08000000
-#define BC_BACK_TARGET		0x04000000
-
-#define IS_DEAD(x)	(((x) & BC_VISITED_P1) == 0)
-#define IS_ZOMBIE(x)	(((x) & BC_ZOMBIE) || ((x) & BC_VISITED_P2) == 0)
+#define BC_BACK_TARGET		0x10000000
+
+#define IS_DEAD(x)	(((x) & BC_VISITED) == 0)
 
 #define LOCAL_MODIFIED		31
 #define LOCAL_REF		30
@@ -606,22 +619,21 @@
 #define VFP_D6		70
 #define VFP_D7		71
 
-#define PREGS	5
+#define PREGS	6
 
 #define JAZ_V1	ARM_R5
 #define JAZ_V2	ARM_R6
 #define JAZ_V3	ARM_R7
-#define JAZ_V4	ARM_R10
-#define JAZ_V5	ARM_R11
+#define JAZ_V4	ARM_R8
+#define JAZ_V5	ARM_R9
+#define JAZ_V6	ARM_R11
 
 #define Rstack		ARM_R4
 #define Rlocals		ARM_R7
 #define Ristate		ARM_R8
-#define Rthread		ARM_R9
-
-#define Rint_stack	ARM_R4
+#define Rthread		ARM_R10
+
 #define Rint_jpc	ARM_R5
-#define Rint_istate	ARM_R8
 
 #define IS_ARM_INT_REG(r) ((r) <= ARM_PC)
 #define IS_ARM_FP_REG(r) (!IS_ARM_INT_REG(r))
@@ -648,9 +660,10 @@
     //		.align	CODE_ALIGN
     // slow_entry:				@ callee save interface
     // 		push	{r4, r5, r6, r7, r9, r10, r11, lr}
+    // 		mov	Rthread, r2
     // 		bl	fast_entry
     // 		pop	{r4, r5, r6, r7, r9, r10, r11, pc}
-    unsigned slow_entry[3];
+    unsigned slow_entry[4];
     unsigned *osr_table;			// pointer to the osr table
     unsigned *exception_table;
     Compiled_Method *next;
@@ -658,20 +671,7 @@
     // This is used when receovering from an exception so we can push
     // the register back into the local variables pool.
     short regusage[6];
-    // OSR Entry point:
-    // 	R0 = entry point within compiled method
-    // 	R1 = locals - 4000 * 4
-    // 	R2 = thread
-    // 	R3 = locals - 31 * 4
-    // osr_entry:
-    // 		@ Load each local into it register allocated register
-    // 		ldr	<reg>, [R1, #(4000-<local>) * 4]
-    //    or	ldr	<reg>, [R3, #(31-<local>) * 4]
-    // 		...
-    // 		mov	Rthread, R2
-    // 		bx	R0
-    // 		.align	CODE_ALIGN
-    unsigned osr_entry[1];
+    unsigned header_end[1];
     // fast_entry:
     // 		push	{r8, lr}
     // 		...	@ The compiled code
@@ -691,7 +691,7 @@
 
 typedef struct Thumb2_Entrypoint {
   unsigned compiled_entrypoint;
-  unsigned osr_entry;
+  short *regusage;
 } Thumb2_Entrypoint;
 
 typedef struct CodeBuf {
@@ -725,8 +725,9 @@
     Thumb2_Stack *jstack;
     Thumb2_Registers *jregs;
     unsigned compiled_return;
-    unsigned zombie_bytes;
+    unsigned compiled_word_return[12];  // R0 .. R11
     unsigned is_leaf;
+    unsigned use_istate;
 } Thumb2_Info;
 
 #define IS_INT_SIZE_BASE_TYPE(c) (c=='B' || c=='C' || c=='F' || c=='I' || c=='S' || c=='Z')
@@ -734,6 +735,7 @@
 
 void Thumb2_save_locals(Thumb2_Info *jinfo, unsigned stackdepth);
 void Thumb2_restore_locals(Thumb2_Info *jinfo, unsigned stackdepth);
+void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth);
 
 static int method_stackchange(const jbyte *base)
 {
@@ -801,7 +803,7 @@
 
 static const char *local_types[] = { "int", "long", "float", "double", "ref" };
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
 
 class Hsdis {
 public:
@@ -832,7 +834,7 @@
       }
 
       if (! (decode_instructions)) {
-	fprintf (stderr, "PrintAssembly (or T2EE_PRINT_DISASS) is set, but\n"
+	fprintf (stderr, "PrintAssembly (or T2_PRINT_DISASS) is set, but\n"
 		 "hsdis-arm.so has not been found or is invalid.  If you want to\n"
 		 "see a disassembly, please ensure that a valid copy of\n"
 		 "hsdis-arm.so is present somewhere in your library load path.\n");
@@ -861,20 +863,6 @@
 
   static Hsdis hsdis;
 
-#if 0
-  printf("Local Variable Usage\n");
-  printf("====================\n");
-  for (idx = 0; idx < nlocals; idx++) {
-    unsigned linfo = locals_info[idx];
-    unsigned typ = (linfo >> LOCAL_INT) & 0x1f;
-
-    printf("Local %d, type = %s (%x)", idx, typ ? local_types[LOG2(typ)] : "!!!unknown!!!", typ);
-    if (linfo & (1 << LOCAL_MODIFIED)) printf(", modified");
-    if (idx < (unsigned)jinfo->method->size_of_parameters()) printf(", parameter");
-    putchar('\n');
-  }
-#endif
-
   fflush(stdout);
   fflush(stderr);
 
@@ -891,10 +879,10 @@
 	int len;
 
 	if (stackinfo & BC_BRANCH_TARGET)
-	  printf("----- Basic Block -----\n");
+	  fprintf(stderr, "----- Basic Block -----\n");
 	JASSERT(bci > last_bci, "disass not advancing");
 	last_bci = bci;
-	printf("%c%4d : ", (stackinfo & BC_VISITED_P1) ? ' ' : '?', bci);
+	fprintf(stderr, "%c%4d : ", (stackinfo & BC_VISITED) ? ' ' : '?', bci);
 	opcode = code_base[bci];
 	if (opcode > OPC_LAST_JAVA_OP) {
 	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
@@ -916,11 +904,11 @@
 	    int def;
 	    unsigned n, i;
 
-	    printf("%02x ", opcode);
+	    fprintf(stderr, "%02x ", opcode);
 	    for (int i = 1; i < 5; i++)
-	      printf("   ");
-	    printf("%s\n", Bytecodes::name((Bytecodes::Code)opcode));
-	    printf("\t%d bytes padding\n", nbci - (bci+1));
+	      fprintf(stderr, "   ");
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
+	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
 	    w = *(unsigned int *)(code_base + nbci + 4);
 	    low = (int)BYTESEX_REVERSE(w);
 	    w = *(unsigned int *)(code_base + nbci + 8);
@@ -928,16 +916,16 @@
 	    w = *(unsigned int *)(code_base + nbci + 0);
 	    def = (int)BYTESEX_REVERSE(w);
 	    table = (unsigned int *)(code_base + nbci + 12);
-	    printf("\tdefault:\t0x%08x\n", def);
-	    printf("\tlow:\t\t0x%08x\n", low);
-	    printf("\thigh:\t\t0x%08x\n", high);
+	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
+	    fprintf(stderr, "\tlow:\t\t0x%08x\n", low);
+	    fprintf(stderr, "\thigh:\t\t0x%08x\n", high);
 	    n = high - low + 1;
 	    while (low <= high) {
 	      int off;
 
 	      w = *table++;
 	      off = (int)BYTESEX_REVERSE(w);
-	      printf("\toffset %d:\t0x%08x\n", low, off);
+	      fprintf(stderr, "\toffset %d:\t0x%08x\n", low, off);
 	      low++;
 	    }
 	    bci += len;
@@ -945,7 +933,7 @@
 	      // The insn sequence generated by tableswitch is 14
 	      // bytes long.
 	      const int tableswitch_code_len = 14;
-	      printf("0x%08x:\t", (int)codebuf+idx);
+	      fprintf(stderr, "0x%08x:\t", (int)codebuf+idx);
 		unsigned short *p = codebuf + idx/2;
 		hsdis.decode_instructions((char*)p,
 					  (char *)p + tableswitch_code_len,
@@ -954,7 +942,7 @@
 		idx += tableswitch_code_len;
 	    }
 	    for (i = 0; i < n; i++) {
-	      printf("0x%08x:\t.short\t0x%04x\n", (int)codebuf+idx, *(short *)((int)codebuf + idx));
+	      fprintf(stderr, "0x%08x:\t.short\t0x%04x\n", (int)codebuf+idx, *(short *)((int)codebuf + idx));
 	      idx += 2;
 	    }
 	    nodisass = 1;
@@ -967,19 +955,19 @@
 	    int npairs;	// The Java spec says signed but must be >= 0??
 	    unsigned *table;
 
-	    printf("%02x ", opcode);
+	    fprintf(stderr, "%02x ", opcode);
 	    for (int i = 1; i < 5; i++)
-	      printf("   ");
-	    printf("%s\n", Bytecodes::name((Bytecodes::Code)opcode));
-	    printf("\t%d bytes padding\n", nbci - (bci+1));
+	      fprintf(stderr, "   ");
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
+	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
 
 	    w = *(unsigned int *)(code_base + nbci + 0);
 	    def = (int)BYTESEX_REVERSE(w);
 	    w = *(unsigned int *)(code_base + nbci + 4);
 	    npairs = (int)BYTESEX_REVERSE(w);
 	    table = (unsigned int *)(code_base + nbci + 8);
-	    printf("\tdefault:\t0x%08x\n", def);
-	    printf("\tnpairs:\t\t0x%08x\n", npairs);
+	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
+	    fprintf(stderr, "\tnpairs:\t\t0x%08x\n", npairs);
 	    for (int i = 0; i < npairs; i++) {
 	      unsigned match, off;
 	      w = table[0];
@@ -987,7 +975,7 @@
 	      w = table[1];
 	      table += 2;
 	      off = BYTESEX_REVERSE(w);
-	      printf("\t  match: 0x%08x, offset: 0x%08x\n", match, off);
+	      fprintf(stderr, "\t  match: 0x%08x, offset: 0x%08x\n", match, off);
 	    }
 	    break;
 	  }
@@ -995,11 +983,11 @@
 	  default:
 	    for (int i = 0; i < 5; i++) {
 	      if (i < len)
-		printf("%02x ", code_base[bci+i]);
+		fprintf(stderr, "%02x ", code_base[bci+i]);
 	      else
-		printf("   ");
+		fprintf(stderr, "   ");
 	    }
-	    printf("%s\n", Bytecodes::name((Bytecodes::Code)code_base[bci]));
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)code_base[bci]));
 	    break;
 	}
 	bci += len;
@@ -1014,10 +1002,10 @@
 	s2 = *(unsigned short *)((int)codebuf + idx + 2);
 	if (s1 == T_UNDEFINED_16 || ((s1 << 16) + s2) == T_UNDEFINED_32) {
 	  if (s1 == T_UNDEFINED_16) {
-	    printf("undefined (0xde00) - UNPATCHED BRANCH???");
+	    fprintf(stderr, "undefined (0xde00) - UNPATCHED BRANCH???");
 	    len = 2;
 	  } else {
-	    printf("undefined (0xf7f0a000) - UNPATCHED BRANCH???");
+	    fprintf(stderr, "undefined (0xf7f0a000) - UNPATCHED BRANCH???");
 	    len = 4;
 	  }
 	} else {
@@ -1027,22 +1015,22 @@
 		 && start_bci[(len + idx)/2] == -1)
 	    len += 2;
 	  hsdis.decode_instructions((char*)p, (char*)p + len,
-				      print_address, NULL, NULL, stdout,
+				      print_address, NULL, NULL, stderr,
 				      "force-thumb");
 	}
 	idx += len;
       }
     }
   }
-  fflush(stdout);
+  fflush(stderr);
 }
 // where
 static void *print_address(void *, const char *tag, void *data) {
   if (strcmp(tag, "insn") == 0)
-    printf("0x%08x:\t", (unsigned int)data);
+    fprintf(stderr, "0x%08x:\t", (unsigned int)data);
   return NULL;
 }
-#endif
+#endif // T2_PRINT_DISASS
 
 #define BCI(len, pop, push, special, islocal, islocal_n, isstore, local_n, local_type) \
 	((len) | ((pop)<<3) | ((push)<<6) | (unsigned)((special) << 31) | ((islocal) << 30) | ((islocal_n) << 29) | ((isstore) << 28) | ((local_n) << 9) | ((local_type) << 11))
@@ -1323,7 +1311,7 @@
 	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iload_N
 };
 
-void Thumb2_pass1(Thumb2_Info *jinfo, unsigned bci)
+void Thumb2_pass1(Thumb2_Info *jinfo, unsigned stackdepth, unsigned bci)
 {
   unsigned code_size = jinfo->code_size;
   jubyte *code_base = jinfo->code_base;
@@ -1337,399 +1325,13 @@
     unsigned bytecodeinfo;
     unsigned opcode;
 
-    if (stackinfo & BC_VISITED_P1) break;
-    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | BC_VISITED_P1;
+    if (stackinfo & BC_VISITED) break;
+    JASSERT((int)stackdepth >= 0, "stackdepth < 0!!");
+    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | stackdepth | BC_VISITED;
     opcode = code_base[bci];
 //	printf("bci = 0x%04x, opcode = 0x%02x (%s)", bci, opcode,  Bytecodes::name((Bytecodes::Code)opcode));
     bytecodeinfo = bcinfo[opcode];
     if (!BCI_SPECIAL(bytecodeinfo)) {
-      bci += BCI_LEN(bytecodeinfo);
-      continue;
-    }
-
-    switch (opcode) {
-
-      case opc_goto: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	bci += off;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-      case opc_goto_w: {
-	int off = GET_JAVA_U4(code_base+bci+1);
-	bci += off;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_if_icmpeq:
-      case opc_if_icmpne:
-      case opc_if_icmplt:
-      case opc_if_icmpge:
-      case opc_if_icmpgt:
-      case opc_if_icmple:
-      case opc_if_acmpeq:
-      case opc_if_acmpne:
-      case opc_ifeq:
-      case opc_ifne:
-      case opc_iflt:
-      case opc_ifge:
-      case opc_ifgt:
-      case opc_ifle:
-      case opc_ifnull:
-      case opc_ifnonnull: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	Thumb2_pass1(jinfo, bci + off);
-	bci += 3;
-	break;
-      }
-
-    case opc_invokedynamic:
-      bci += 5;
-      break;
-    case opc_fast_aldc_w:
-      bci += 3;
-      break;
-    case opc_fast_aldc:
-      bci += 2;
-      break;
-
-      case opc_jsr: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	Thumb2_pass1(jinfo, bci + off);
-	bci += 3;
-	break;
-      }
-      case opc_jsr_w: {
-	int off = GET_JAVA_U4(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	Thumb2_pass1(jinfo, bci + off);
-	bci += 5;
-	break;
-      }
-
-      case opc_ireturn:
-      case opc_lreturn:
-      case opc_freturn:
-      case opc_dreturn:
-      case opc_areturn:
-      case opc_return:
-      case opc_return_register_finalizer:
-      case opc_ret:
-      case opc_athrow:
-	// The test for BC_VISITED_P1 above will break out of the loop!!!
-	break;
-
-      case opc_tableswitch: {
-	int low, high;
-	unsigned w;
-	unsigned *table;
-	unsigned nbci;
-	int def;
-
-	nbci = bci & ~3;
-	w = *(unsigned int *)(code_base + nbci + 8);
-	low = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 12);
-	high = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 4);
-	def = (int)BYTESEX_REVERSE(w);
-	table = (unsigned int *)(code_base + nbci + 16);
-
-	while (low <= high) {
-	  int off;
-	  w = *table++;
-	  off = (int)BYTESEX_REVERSE(w);
-	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	  Thumb2_pass1(jinfo, bci + off);
-	  low++;
-	}
-
-	bci += def;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_lookupswitch: {
-	unsigned w;
-	unsigned nbci;
-	int def;
-	int npairs;	// The Java spec says signed but must be >= 0??
-	unsigned *table;
-
-	nbci = bci & ~3;
-	w = *(unsigned int *)(code_base + nbci + 4);
-	def = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 8);
-	npairs = (int)BYTESEX_REVERSE(w);
-	table = (unsigned int *)(code_base + nbci + 16);
-
-	for (int i = 0; i < npairs; i++) {
-	  int off;
-	  w = *table;
-	  table += 2;
-	  off = (int)BYTESEX_REVERSE(w);
-	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	  Thumb2_pass1(jinfo, bci + off);
-	}
-
-	bci += def;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_getstatic:
-      case opc_putstatic:
-      case opc_getfield:
-      case opc_putfield: {
-	bci += 3;
-	break;
-      }
-
-      case opc_invokeresolved:
-      case opc_invokespecialresolved:
-      case opc_invokestaticresolved:
-      case opc_invokevfinal:
-      case opc_invokevirtual:
-      case opc_invokespecial:
-      case opc_invokestatic:
-	jinfo->is_leaf = 0;
-	bci += 3;
-	break;
-
-      case opc_invokeinterface:
-	jinfo->is_leaf = 0;
-	bci += 5;
-	break;
-
-      case opc_multianewarray:
-	bci += 4;
-	break;
-
-      case opc_wide:
-	opcode = code_base[bci+1];
-	if (opcode == opc_iinc) {
-	  bci += 6;
-	} else {
-	  bci += 4;
-	}
-	break;
-
-      default:
-	opcode = code_base[bci];
-	fatal(err_msg("Undefined opcode %d\n", opcode));
-	break;
-    }
-  }
-}
-
-#ifdef ZOMBIE_DETECTION
-int Thumb2_is_zombie(Thumb2_Info *jinfo, unsigned bci)
-{
-  unsigned code_size = jinfo->code_size;
-  jubyte *code_base = jinfo->code_base;
-  unsigned bytecodeinfo;
-  unsigned opcode;
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-
-  do {
-    opcode = code_base[bci];
-    // Short circuit exit - commented out because even if it has been executed
-    // we treat throw, jsr, and ret as zombies because they will call out to the
-    // interpreter.
-    // if (opcode > OPC_LAST_JAVA_OP) return 0;
-    bytecodeinfo = bcinfo[opcode];
-    if (!BCI_SPECIAL(bytecodeinfo)) {
-	bci += BCI_LEN(bytecodeinfo);
-#if 0
-	if (opcode >= opc_iload_iload) {
-	  opcode = code_base[bci];
-	  bci += BCI_LEN(bcinfo[opcode]);
-	} else if (BCI_ISLOCAL(bytecodeinfo)) {
-	  if (opcode == opc_iload || (opcode >= opc_iload_0 && opcode <= opc_iload_3)) {
-	    opcode = code_base[bci];
-	    if (opcode == opc_iload || (opcode >= opc_iload_0 && opcode <= opc_iload_3) ||
-					(opcode >= opc_iconst_m1 && opcode <= opc_iconst_5)) {
-		printf("found new zombie at %d\n", bci);
-		return 1;
-	    }
-	  }
-	} else if (opcode == opc_iadd || opcode == opc_isub ||
-		      opcode == opc_iand || opcode == opc_ior || opcode == opc_ixor) {
-	    opcode = code_base[bci];
-	    if (opcode == opc_istore || (opcode >= opc_istore_0 && opcode <= opc_istore_3)) {
-		printf("found new zombie at %d\n", bci);
-		return 1;
-	    }
-	}
-#endif
-    } else {
-      switch (opcode) {
-	case opc_goto:
-	case opc_goto_w:
-	case opc_ifeq:
-	case opc_ifne:
-	case opc_iflt:
-	case opc_ifge:
-	case opc_ifgt:
-	case opc_ifle:
-	case opc_ifnull:
-	case opc_ifnonnull:
-	case opc_if_icmpeq:
-	case opc_if_icmpne:
-	case opc_if_icmplt:
-	case opc_if_icmpge:
-	case opc_if_icmpgt:
-	case opc_if_icmple:
-	case opc_if_acmpeq:
-	case opc_if_acmpne:
-	case opc_tableswitch:
-	case opc_lookupswitch:
-	  return 0;
-	case opc_ireturn:
-	case opc_lreturn:
-	case opc_freturn:
-	case opc_dreturn:
-	case opc_areturn:
-	case opc_return:
-	case opc_return_register_finalizer:
-	    return 0;
-	case opc_jsr:
-	case opc_jsr_w:
-	case opc_ret:
-	case opc_athrow:
-	    return 1;
-	case opc_invokeinterface:
-	case opc_invokevirtual:
-	case opc_invokespecial:
-	case opc_invokestatic:
-	case opc_putfield:
-	case opc_getfield:
-	case opc_putstatic:
-	case opc_getstatic: {
-	  constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-	  ConstantPoolCacheEntry* cache;
-	  int index = GET_NATIVE_U2(code_base+bci+1);
-
-	  cache = cp->entry_at(index);
-	  if (!cache->is_resolved((Bytecodes::Code)opcode)) return 1;
-	  bci += 3;
-	  if (opcode == opc_invokeinterface) bci += 2;
-	  break;
-
-	}
-	case opc_invokeresolved:
-	case opc_invokespecialresolved:
-	case opc_invokestaticresolved:
-	case opc_invokevfinal:
-	  bci += 3;
-	  break;
-
-	case opc_multianewarray:
-	  bci += 4;
-	  break;
-
-	case opc_wide:
-	  opcode = code_base[bci+1];
-	  if (opcode == opc_iinc) {
-	    bci += 6;
-	  } else {
-	    bci += 4;
-	  }
-	  break;
-
-	default:
-	  opcode = code_base[bci];
-	  fatal("Undefined opcode %d\n", opcode);
-	  break;
-      }
-    }
-    if (bci >= code_size) break;
-  } while (!(bc_stackinfo[bci] & BC_BRANCH_TARGET));
-  return 0;
-}
-#endif // ZOMBIE_DETECTION
-
-void Thumb2_RegAlloc(Thumb2_Info *jinfo)
-{
-  unsigned *locals_info = jinfo->locals_info;
-  unsigned i, j;
-  unsigned linfo;
-  unsigned score, max_score;
-  unsigned local;
-  unsigned nlocals = jinfo->method->max_locals();
-  unsigned *pregs = jinfo->jregs->pregs;
-  unsigned npregs = jinfo->jregs->npregs;
-
-  for (i = 0; i < npregs; i++) jinfo->jregs->mapping[i] = -1;
-  for (i = 0; i < npregs; i++) {
-    max_score = 0;
-    for (j = 0; j < nlocals; j++) {
-      linfo = locals_info[j];
-
-      if (linfo & ((1<<LOCAL_ALLOCATED)|(1<<LOCAL_DOUBLE))) continue;
-      score = LOCAL_READS(linfo) + LOCAL_WRITES(linfo);
-      if (linfo & (1<<LOCAL_MODIFIED)) score = (score+1) >> 2;
-      if (linfo & (1<<LOCAL_REF)) score = score - (score >> 2);
-      if (linfo & (1<<LOCAL_LONG)) score = (score+1) >> 2;
-      if (score > max_score) max_score = score, local = j;
-    }
-    if (max_score < 2) break;
-    locals_info[local] |= 1<<LOCAL_ALLOCATED;
-    jinfo->jregs->r_local[local] = pregs[i];
-    jinfo->jregs->mapping[i] = local;
-  }
-#ifdef T2EE_PRINT_REGUSAGE
-  if (t2ee_print_regusage) {
-    printf("Regalloc: %d physical registers allocated as follows\n", npregs);
-    for (j = 0; j < nlocals; j++) {
-      unsigned r = jinfo->jregs->r_local[j];
-      if (r) {
-	unsigned typ = (locals_info[j] >> LOCAL_INT) & 0x1f;
-	printf("  ARM Reg R%d -> local %d (type = %s)\n", r, j, local_types[LOG2(typ)]);
-      }
-    }
-  }
-#endif
-}
-
-void Thumb2_pass2(Thumb2_Info *jinfo, unsigned stackdepth, unsigned bci)
-{
-  unsigned code_size = jinfo->code_size;
-  jubyte *code_base = jinfo->code_base;
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-  unsigned *locals_info = jinfo->locals_info;
-  unsigned check_zombie = 0;
-  //constantPoolCacheOop cp = jinfo->method->constants()->cache();
-
-  while (bci < code_size) {
-    unsigned stackinfo = bc_stackinfo[bci];
-    unsigned bytecodeinfo;
-    unsigned opcode;
-
-    if (stackinfo & BC_VISITED_P2) break;
-    JASSERT((int)stackdepth >= 0, "stackdepth < 0!!");
-    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | stackdepth | BC_VISITED_P2;
-#ifdef ZOMBIE_DETECTION
-    if (check_zombie || (stackinfo & BC_BRANCH_TARGET)) {
-      if (Thumb2_is_zombie(jinfo, bci)) {
-	printf("zombie code at %d\n", bci);
-	bc_stackinfo[bci] |= BC_ZOMBIE;
-	return;
-      }
-      check_zombie = 0;
-    }
-#endif
-    opcode = code_base[bci];
-//	printf("bci = 0x%04x, opcode = 0x%02x (%s), stackdepth = %d\n", bci, opcode,  Bytecodes::name((Bytecodes::Code)opcode), stackdepth);
-    bytecodeinfo = bcinfo[opcode];
-    if (!BCI_SPECIAL(bytecodeinfo)) {
       if (BCI_ISLOCAL(bytecodeinfo)) {
 	unsigned local = BCI_LOCAL_N(bytecodeinfo);
 	unsigned local_type = BCI_LOCAL_TYPE(bytecodeinfo) + LOCAL_INT;
@@ -1760,12 +1362,20 @@
 
     switch (opcode) {
 
-      case opc_goto:
-	bci += GET_JAVA_S2(code_base+bci+1);
+      case opc_goto: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	bci += off;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
 	break;
-      case opc_goto_w:
-	bci += GET_JAVA_U4(code_base+bci+1);
+      }
+      case opc_goto_w: {
+	int off = GET_JAVA_U4(code_base+bci+1);
+	bci += off;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
 	break;
+      }
 
       case opc_ifeq:
       case opc_ifne:
@@ -1774,12 +1384,14 @@
       case opc_ifgt:
       case opc_ifle:
       case opc_ifnull:
-      case opc_ifnonnull:
+      case opc_ifnonnull: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
 	stackdepth -= 1;
-	Thumb2_pass2(jinfo, stackdepth, bci + GET_JAVA_S2(code_base+bci+1));
-	check_zombie = 1;
+        Thumb2_pass1(jinfo, stackdepth, bci + off);
 	bci += 3;
 	break;
+      }
 
       case opc_if_icmpeq:
       case opc_if_icmpne:
@@ -1788,22 +1400,30 @@
       case opc_if_icmpgt:
       case opc_if_icmple:
       case opc_if_acmpeq:
-      case opc_if_acmpne:
+      case opc_if_acmpne: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
 	stackdepth -= 2;
-	Thumb2_pass2(jinfo, stackdepth, bci + GET_JAVA_S2(code_base+bci+1));
-	check_zombie = 1;
+        Thumb2_pass1(jinfo, stackdepth, bci + off);
 	bci += 3;
 	break;
-
-      case opc_jsr:
-	Thumb2_pass2(jinfo, stackdepth+1, bci + GET_JAVA_S2(code_base+bci+1));
+      }
+
+      case opc_jsr: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
 	bci += 3;
 	stackdepth = 0;
 	break;
-      case opc_jsr_w:
-	Thumb2_pass2(jinfo, stackdepth+1, bci + GET_JAVA_U4(code_base+bci+1));
+      }
+      case opc_jsr_w: {
+	int off = GET_JAVA_U4(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
 	bci += 5;
 	break;
+      }
 
       case opc_ireturn:
       case opc_lreturn:
@@ -1814,7 +1434,7 @@
       case opc_return_register_finalizer:
       case opc_ret:
       case opc_athrow:
-	// The test for BC_VISITED_P2 above will break out of the loop!!!
+	// The test for BC_VISITED above will break out of the loop!!!
 	break;
 
       case opc_tableswitch: {
@@ -1838,12 +1458,14 @@
 	  int off;
 	  w = *table++;
 	  off = (int)BYTESEX_REVERSE(w);
-	  Thumb2_pass2(jinfo, stackdepth, bci + off);
+	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	  Thumb2_pass1(jinfo, stackdepth, bci + off);
 	  low++;
 	}
 
-	check_zombie = 1;
 	bci += def;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
 	break;
       }
 
@@ -1867,11 +1489,13 @@
 	  w = *table;
 	  table += 2;
 	  off = (int)BYTESEX_REVERSE(w);
-	  Thumb2_pass2(jinfo, stackdepth, bci + off);
+	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	  Thumb2_pass1(jinfo, stackdepth, bci + off);
 	}
 
-	check_zombie = 1;
 	bci += def;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
 	break;
       }
 
@@ -1927,6 +1551,7 @@
 	Symbol *sig = pool->signature_ref_at(index);
 	const jbyte *base = sig->base();
 
+	jinfo->is_leaf = 0;
 	//tty->print("%d: %s: %s\n", opcode, name->as_C_string(), sig->as_C_string());
 	stackdepth += method_stackchange(base);
 	opcode = code_base[bci];
@@ -1942,7 +1567,7 @@
 	bci += 4;
 	break;
 
-      case opc_wide:
+      case opc_wide: {
 	opcode = code_base[bci+1];
 	if (opcode == opc_iinc) {
 	  bci += 6;
@@ -1962,6 +1587,7 @@
 	    fatal(err_msg("Undefined wide opcode %d\n", opcode));
 	}
 	break;
+      }
 
       default:
 	opcode = code_base[bci];
@@ -1971,6 +1597,50 @@
   }
 }
 
+void Thumb2_RegAlloc(Thumb2_Info *jinfo)
+{
+  unsigned *locals_info = jinfo->locals_info;
+  unsigned i, j;
+  unsigned linfo;
+  unsigned score, max_score;
+  unsigned local;
+  unsigned nlocals = jinfo->method->max_locals();
+  unsigned *pregs = jinfo->jregs->pregs;
+  unsigned npregs = jinfo->jregs->npregs;
+
+  for (i = 0; i < npregs; i++) jinfo->jregs->mapping[i] = -1;
+  for (i = 0; i < npregs; i++) {
+    if (jinfo->use_istate && pregs[i] == Ristate) continue;
+    max_score = 0;
+    for (j = 0; j < nlocals; j++) {
+      linfo = locals_info[j];
+
+      if (linfo & ((1<<LOCAL_ALLOCATED)|(1<<LOCAL_DOUBLE))) continue;
+      score = LOCAL_READS(linfo) + LOCAL_WRITES(linfo);
+      if (linfo & (1<<LOCAL_MODIFIED)) score = (score+1) >> 2;
+      if (linfo & (1<<LOCAL_REF)) score = score - (score >> 2);
+      if (linfo & (1<<LOCAL_LONG)) score = (score+1) >> 2;
+      if (score > max_score) max_score = score, local = j;
+    }
+    if (max_score < (OSPACE ? 8 : 2)) break;
+    locals_info[local] |= 1<<LOCAL_ALLOCATED;
+    jinfo->jregs->r_local[local] = pregs[i];
+    jinfo->jregs->mapping[i] = local;
+  }
+#ifdef T2_PRINT_REGUSAGE
+  if (t2_print_regusage) {
+    fprintf(stderr, "Regalloc: %d physical registers allocated as follows\n", npregs);
+    for (j = 0; j < nlocals; j++) {
+      unsigned r = jinfo->jregs->r_local[j];
+      if (r) {
+	unsigned typ = (locals_info[j] >> LOCAL_INT) & 0x1f;
+	fprintf(stderr, "  ARM Reg R%d -> local %d (type = %s)\n", r, j, local_types[LOG2(typ)]);
+      }
+    }
+  }
+#endif
+}
+
 //-------------------------------------------------------------------------------------
 
 #define Thumb2		1
@@ -2346,12 +2016,15 @@
   return codebuf->idx * 2;
 }
 
-#define CODE_ALIGN 64
-#define CODE_ALIGN_SIZE 64
-
 u32 out_align(CodeBuf *codebuf, unsigned align)
 {
-  codebuf->idx += (((out_pos(codebuf) + (align-1)) & ~(align-1)) - out_pos(codebuf)) / sizeof(short);
+  while ((out_pos(codebuf) & (align-1)) != 0) out_16(codebuf, 0);
+  return out_pos(codebuf);
+}
+
+u32 out_align_offset(CodeBuf *codebuf, unsigned align, unsigned offset)
+{
+  while ((out_pos(codebuf) & (align-1)) != offset) out_16(codebuf, 0);
   return out_pos(codebuf);
 }
 
@@ -2824,6 +2497,16 @@
 //  return dop_reg(codebuf, DP_MOV, dst, 0, src, SHIFT_LSL, 0);
 }
 
+int nop_16(CodeBuf *codebuf)
+{
+  return out_16(codebuf, T_MOV(ARM_R0, ARM_R0));
+}
+
+int nop_32(CodeBuf *codebuf)
+{
+  return dop_reg(codebuf, DP_MOV, ARM_R8, 0, ARM_R8, SHIFT_LSL, 0);
+}
+
 int mvn_reg(CodeBuf *codebuf, u32 dst, u32 src)
 {
   if (dst < ARM_R8 && src < ARM_R8)
@@ -3037,7 +2720,7 @@
 
 int cmp_imm(CodeBuf *codebuf, Reg src, u32 imm)
 {
-  if (src <= ARM_R8 && imm < 256) return out_16(codebuf, T_CMP_IMM(src, imm));
+  if (src < ARM_R8 && imm < 256) return out_16(codebuf, T_CMP_IMM(src, imm));
   return dop_imm(codebuf, DP_CMP, 0x0f, src, imm);
 }
 
@@ -3046,36 +2729,16 @@
   return dop_imm(codebuf, DP_TST, 0x0f, src, imm);
 }
 
-int hbl(CodeBuf *codebuf, unsigned handler)
-{
-  mov_imm(codebuf, ARM_IP, 0);
-  str_imm(codebuf, ARM_IP, ARM_IP, 0, 1, 0);
-#if 0
-  if ((Thumb2 && ThumbEE))
-    return out_16(codebuf, T_HBL(handler));
-  if (TESTING)
-    return mov_imm(codebuf, ARM_R8, handler);
-  J_Unimplemented();
-#endif
-}
-
-#if 0
-int enter_leave(CodeBuf *codebuf, unsigned enter)
-{
-  if ((Thumb2 && ThumbEE))
-    return out_16x2(codebuf, T_ENTER_LEAVE(enter));
-  J_Unimplemented();
-}
-#endif
-
-int fullBarrier(CodeBuf *codebuf)
-{
-  return out_16x2(codebuf, T_DMB(0xf));
-}
-
-int storeBarrier(CodeBuf *codebuf)
-{
-  return out_16x2(codebuf, T_DMB(0xe));
+void fullBarrier(CodeBuf *codebuf)
+{
+  if (os::is_MP())
+    out_16x2(codebuf, T_DMB(0xf));
+}
+
+void storeBarrier(CodeBuf *codebuf)
+{
+  if (os::is_MP())
+    out_16x2(codebuf, T_DMB(0xe));
 }
 
 int tbh(CodeBuf *codebuf, Reg base, Reg idx)
@@ -3458,6 +3121,21 @@
 
 //-----------------------------------------------------------------------------------
 
+// An example of some debugging logic that you can use to trigger a
+// breakpoint when a particular method is executing.
+#define EQ(S1, S2) (S1 && (strncmp(S1, S2, strlen(S2)) == 0))
+extern "C" void Debug(interpreterState istate)
+{
+  char valuebuf[8192];
+  istate->method()->name_and_sig_as_C_string(valuebuf, sizeof valuebuf);
+  if (EQ(valuebuf, "java.util.Hashtable.get(Ljava/lang/Object;)")
+      // && istate->method()->bci_from(istate->bcp()) == 45
+      ) {
+    asm("nop");
+  }
+}
+#undef EQ
+
 void Thumb2_Push_Multiple(CodeBuf *codebuf, Reg *regs, unsigned nregs)
 {
   unsigned regset = 0;
@@ -3510,58 +3188,6 @@
   ldm(codebuf, regset, Rstack, POP_FD, 1);
 }
 
-#if 0
-int load_multiple(CodeBuf *codebuf, Reg base, Reg *regs, u32 nregs, u32 st, u32 wb)
-{
-  unsigned regset = 0;
-  unsigned regmask;
-  unsigned pre = 0;
-  int dir = 1;
-  unsigned u;
-  Reg r;
-
-  if (st == IB || st == DB) pre = 4;
-  if (st == DA || st == DB) dir = -4;
-  JASSERT(nregs > 0, "nregs must be > 0");
-  if (nregs == 1)
-    return ldr_imm(codebuf, regs[0], base, dir, pre, wb);
-  if (dir > 0) {
-    u = 0;
-    do {
-      r = regs[u];
-      regmask = 1<<r;
-      if (regset != 0 && regmask >= regset) {
-	if (!wb && base != ARM_IP) {
-	  mov_reg(codebuf, ARM_IP, base);
-	  base = ARM_IP;
-	}
-	ldm(codebuf, regset, base, st, 1);
-	regset = 0;
-      }
-      regset |= regmask;
-    } while (++u < nregs);
-    ldm(codebuf, regset, base, st, wb);
-  } else {
-    u = nregs;
-    do {
-      u--;
-      r = regs[u];
-      regmask = 1<<r;
-      if (regmask <= (regset & -regset)) {
-	if (!wb && base != ARM_IP) {
-	  mov_reg(codebuf, ARM_IP, base);
-	  base = ARM_IP;
-	}
-	ldm(codebuf, regset, base, st, 1);
-	regset = 0;
-      }
-      regset |= regmask;
-    } while (u > 0);
-    ldm(codebuf, regset, base, st, wb);
-  }
-}
-#endif
-
 int mov_multiple(CodeBuf *codebuf, Reg *dst, Reg *src, unsigned nregs)
 {
   unsigned u, n, p;
@@ -4024,25 +3650,42 @@
 }
 
 #define LOCAL_OFFSET(local, stackdepth, nlocals) ((stackdepth)*4 + FRAME_SIZE + ((nlocals)-1-(local))*4)
+#define ISTATE_REG(jinfo)	  ((jinfo)->use_istate ? Ristate : Rstack)
+#define ISTATE(jinfo, stackdepth) ((jinfo)->use_istate ? 0 : (((stackdepth)-(jinfo)->jstack->depth)*4))
+#define ISTATE_OFFSET(jinfo, stackdepth, offset) (ISTATE(jinfo, stackdepth) + (offset))
 
 void load_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
 {
-#ifdef USE_RLOCAL
-  ldr_imm(jinfo->codebuf, r, Rlocals, -local * 4, 1, 0);
-#else
   int nlocals = jinfo->method->max_locals();
-  ldr_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
-#endif
+  if (jinfo->use_istate)
+    ldr_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4, 1, 0);
+  else
+    ldr_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
 }
 
 void store_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
 {
-#ifdef USE_RLOCAL
-  str_imm(jinfo->codebuf, r, Rlocals, -local << 2, 1, 0);
-#else
   int nlocals = jinfo->method->max_locals();
-  str_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
-#endif
+  if (jinfo->use_istate)
+    str_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4, 1, 0);
+  else
+    str_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
+}
+
+void load_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
+{
+  if (jinfo->use_istate)
+    ldr_imm(jinfo->codebuf, r, Ristate, istate_offset, 1, 0);
+  else
+    ldr_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset), 1, 0);
+}
+
+void store_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
+{
+  if (jinfo->use_istate)
+    str_imm(jinfo->codebuf, r, Ristate, istate_offset, 1, 0);
+  else
+    str_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset), 1, 0);
 }
 
 void Thumb2_Load(Thumb2_Info *jinfo, int local, unsigned stackdepth)
@@ -4059,7 +3702,6 @@
     Thumb2_Spill(jinfo, 1, 0);
     JASSERT(stackdepth >= jstack->depth, "negative stack offset?");
     stackdepth -= jstack->depth;
-    if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
     r = JSTACK_REG(jstack);
     PUSH(jstack, r);
     load_local(jinfo, r, local, stackdepth);
@@ -4081,7 +3723,6 @@
     } else {
       Thumb2_Spill(jinfo, 1, 0);
       stackdepth -= jstack->depth;
-      if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
       PUSH(jstack, r_hi);
       r_lo = PUSH(jstack, JSTACK_REG(jstack));
       load_local(jinfo, r_lo, local+1, stackdepth);
@@ -4091,14 +3732,12 @@
     if (r_lo) {
       Thumb2_Spill(jinfo, 1, 0);
       stackdepth -= jstack->depth;
-      if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
       r_hi = PUSH(jstack, JSTACK_REG(jstack));
       load_local(jinfo, r_hi, local, stackdepth);
       PUSH(jstack, r_lo);
     } else {
       Thumb2_Spill(jinfo, 2, 0);
       stackdepth -= jstack->depth;
-      if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
       r_hi = PUSH(jstack, JSTACK_REG(jstack));
       r_lo = PUSH(jstack, JSTACK_REG(jstack));
       load_local(jinfo, r_hi, local, stackdepth);
@@ -4115,7 +3754,6 @@
 
   Thumb2_Fill(jinfo, 1);
   stackdepth -= jstack->depth;
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
   r = POP(jstack);
   r_local = jinfo->jregs->r_local[local];
   if (r_local) {
@@ -4134,7 +3772,6 @@
   int nlocals = jinfo->method->max_locals();
 
   Thumb2_Fill(jinfo, 2);
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
   r_lo = POP(jstack);
   r_hi = POP(jstack);
   stackdepth -= 2;
@@ -4486,15 +4123,6 @@
   mov_imm(jinfo->codebuf, ARM_R1, bci);
   mov_imm(jinfo->codebuf, ARM_IP, 0);
   str_imm(jinfo->codebuf, ARM_IP, ARM_IP, 0, 1, 0);
-//  hbl(jinfo->codebuf, handler);
-}
-
-void Thumb2_Debug(Thumb2_Info *jinfo, unsigned handler)
-{
-#if 0
-  Thumb2_Flush(jinfo);
-  bl(jinfo->codebuf, handlers[handler]);
-#endif
 }
 
 void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start);
@@ -4541,7 +4169,7 @@
   // the safepoint test
   // abnormal case: read the polling page, trap to handler
   // which resets return address into the safepoint check code
-  // 
+  //
   // with a negative offset the generated code will look like
   //    movw r_tmp, #polling_page
   //    movt r_tmp, #polling_page
@@ -4572,7 +4200,7 @@
   //    >
   // L1:
   //    <caller plants branch/return here>
-  // 
+  //
   //  n.b. for a return there is no need save or restore locals
 
   int r_tmp = Thumb2_Tmp(jinfo, 0);
@@ -4617,6 +4245,8 @@
     Thumb2_save_locals(jinfo, stackdepth);
     //}
   mov_imm(jinfo->codebuf, ARM_R1, bci+CONSTMETHOD_CODEOFFSET);
+  add_imm(jinfo->codebuf, ARM_R2, ISTATE_REG(jinfo),
+	  ISTATE_OFFSET(jinfo, stackdepth, 0));
   bl(jinfo->codebuf, handlers[H_SAFEPOINT]);
   //if (offset != 0) {
     Thumb2_restore_locals(jinfo, stackdepth);
@@ -4662,13 +4292,14 @@
     return -1;
 }
 
-int Thumb2_Goto(Thumb2_Info *jinfo, unsigned bci, int offset, int len, int stackdepth)
+int Thumb2_Goto(Thumb2_Info *jinfo, unsigned bci, int offset, int len, int stackdepth = -1)
 {
     unsigned dest_taken = bci + offset;
     unsigned dest_not_taken = bci + len;
     unsigned loc;
 
-    if (jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
+    if (stackdepth >= 0
+	&& jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
       // n.b. the backwards branch will be planted by the safepoint routine
       Thumb2_Safepoint(jinfo, stackdepth, bci, offset);
       return dest_not_taken;
@@ -4680,6 +4311,80 @@
     return -1;
 }
 
+void Thumb2_save_locals(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if ((locals_info[i] & (1 << LOCAL_REF)) && (locals_info[i] & (1 << LOCAL_MODIFIED))) {
+	store_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_restore_locals(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if (locals_info[i] & (1<<LOCAL_REF)) {
+	load_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_invoke_save(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if (locals_info[i] & (1 << LOCAL_MODIFIED)) {
+	store_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_invoke_restore(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+	load_local(jinfo, r, i, stackdepth);
+    }
+  }
+}
+
+void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth)
+{
+    Thumb2_Flush(jinfo);
+    Thumb2_invoke_save(jinfo, stackdepth);
+    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+    bl(jinfo->codebuf, handlers[handler]);
+}
+
 void Thumb2_Return(Thumb2_Info *jinfo, unsigned opcode, int bci, int stackdepth)
 {
   Thumb2_Safepoint(jinfo, stackdepth, bci, 0);
@@ -4687,13 +4392,8 @@
   Reg r_lo, r;
   Thumb2_Stack *jstack = jinfo->jstack;
 
-  if (0 /*jinfo->compiled_return*/) {
-    unsigned bci = jinfo->compiled_return;
-
-    JASSERT(jinfo->bc_stackinfo[bci] & BC_COMPILED, "return not compiled");
-    JASSERT(jinfo->code_base[bci] == opcode, "type of return changed");
-    branch_uncond(jinfo->codebuf, jinfo->bc_stackinfo[bci] & ~BC_FLAGS_MASK);
-    return;
+  if (jinfo->method->has_monitor_bytecodes()) {
+    Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
   }
 
   if (jinfo->method->is_synchronized()) {
@@ -4703,7 +4403,7 @@
 //    Thumb2_save_locals(jinfo);
     // Free the monitor
     //
-    // 		sub	r1, Ristate, #8
+    // 		add	r1, #<stackdepth>-8
     // 		ldr	r2, [r1, #4]
     //		cbz	r2, throw_illegal_monitor_state
     //		ldr	r0, [r1, #0]
@@ -4724,7 +4424,8 @@
     //
     // JAZ_V1 == tmp2
     // JAZ_V2 == tmp1
-    sub_imm(jinfo->codebuf, ARM_R1, Ristate, frame::interpreter_frame_monitor_size()*wordSize);
+    add_imm(jinfo->codebuf, ARM_R1, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) - frame::interpreter_frame_monitor_size()*wordSize);
+
     ldr_imm(jinfo->codebuf, ARM_R2, ARM_R1, 4, 1, 0);
     loc_illegal_monitor_state = forward_16(jinfo->codebuf);
     ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0, 1, 0);
@@ -4750,7 +4451,15 @@
     cbz_patch(jinfo->codebuf, ARM_R3, loc_success2);
   }
 
-  if (opcode != opc_return) {
+  if (opcode == opc_return) {
+    if (jinfo->compiled_return) {
+      unsigned ret_idx = jinfo->compiled_return;
+
+      branch_uncond(jinfo->codebuf, ret_idx);
+      return;
+    }
+    if (OSPACE) jinfo->compiled_return = jinfo->codebuf->idx * 2;
+  } else {
     if (opcode == opc_lreturn || opcode == opc_dreturn) {
       Thumb2_Fill(jinfo, 2);
       r_lo = POP(jstack);
@@ -4758,6 +4467,13 @@
     } else {
       Thumb2_Fill(jinfo, 1);
       r = POP(jstack);
+      if (jinfo->compiled_word_return[r]) {
+        unsigned ret_idx = jinfo->compiled_word_return[r];
+
+        branch_uncond(jinfo->codebuf, ret_idx);
+        return;
+      }
+      if (OSPACE) jinfo->compiled_word_return[r] = jinfo->codebuf->idx * 2;
     }
   }
 
@@ -4778,11 +4494,8 @@
     }
   }
 
-//  sub_imm(jinfo->codebuf, Ristate, ARM_LR, ISTATE_NEXT_FRAME);
   str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
   str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-  Thumb2_Debug(jinfo, H_DEBUG_METHODEXIT);
-//  enter_leave(jinfo->codebuf, 0);
 
   // deoptimized_frames = 0
   // FIXME: This should be done in the slow entry, but only three
@@ -4792,113 +4505,12 @@
   ldm(jinfo->codebuf, C_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
 }
 
-#if 0
-void Thumb2_save_all_locals(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-	store_local(jinfo, r, i, stackdepth);
-    }
-  }
-}
-#endif
-
-void Thumb2_save_locals(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if ((locals_info[i] & (1 << LOCAL_REF)) && (locals_info[i] & (1 << LOCAL_MODIFIED))) {
-	store_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_restore_locals(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if (locals_info[i] & (1<<LOCAL_REF)) {
-	load_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_invoke_save(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if (locals_info[i] & (1 << LOCAL_MODIFIED)) {
-	store_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_invoke_restore(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-	load_local(jinfo, r, i, stackdepth);
-    }
-  }
-}
-
-void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth)
-{
-    Thumb2_Flush(jinfo);
-    Thumb2_invoke_save(jinfo, stackdepth);
-    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-    bl(jinfo->codebuf, handlers[handler]);
-}
-
-void Thumb2_Jsr(Thumb2_Info *jinfo, unsigned bci, unsigned stackdepth)
-{
-      Thumb2_Exit(jinfo, H_JSR, bci, stackdepth);
-}
-
 int Thumb2_Accessor(Thumb2_Info *jinfo)
 {
   jubyte *code_base = jinfo->code_base;
   constantPoolCacheOop  cp = jinfo->method->constants()->cache();
   ConstantPoolCacheEntry* cache;
   int index = GET_NATIVE_U2(code_base+2);
-  unsigned loc;
   unsigned *bc_stackinfo = jinfo->bc_stackinfo;
 
   JASSERT(code_base[0] == opc_aload_0 || code_base[0] == opc_iaccess_0, "not an aload_0 in accessor");
@@ -4909,28 +4521,28 @@
   TosState tos_type = cache->flag_state();
   int field_offset = cache->f2();
 
-  // Slow entry point
-  loc = forward_32(jinfo->codebuf);
-  out_32(jinfo->codebuf, 0);
-  out_32(jinfo->codebuf, 0);
+  // Slow entry point - callee save
+  // R0 = method
+  // R2 = thread
+  stm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  bl(jinfo->codebuf, out_pos(jinfo->codebuf) + FAST_ENTRY_OFFSET - 6);
+  ldm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+  out_16(jinfo->codebuf, 0);
 
   out_32(jinfo->codebuf, 0);	// pointer to osr table
   out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
   out_32(jinfo->codebuf, 0);	// next compiled method
 
-  out_32(jinfo->codebuf, 0);    // regusage
-  out_32(jinfo->codebuf, 0);
-  out_32(jinfo->codebuf, 0);
-
-  // OSR entry point
-  mov_reg(jinfo->codebuf, ARM_PC, ARM_R0);
+  out_32(jinfo->codebuf, -1);    // regusage
+  out_32(jinfo->codebuf, -1);
+  out_32(jinfo->codebuf, -1);
 
   out_align(jinfo->codebuf, CODE_ALIGN);
 
   // fast entry point
   bc_stackinfo[0] = (bc_stackinfo[0] & BC_FLAGS_MASK) | (jinfo->codebuf->idx * 2) | BC_COMPILED;
-  branch_uncond_patch(jinfo->codebuf, loc, jinfo->codebuf->idx * 2);
-  ldr_imm(jinfo->codebuf, ARM_R1, ARM_R2, THREAD_JAVA_SP, 1, 0);
+  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_JAVA_SP, 1, 0);
   ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0, 1, 0);
   if (tos_type == btos)
     ldrsb_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset, 1, 0);
@@ -4952,19 +4564,26 @@
   return 1;
 }
 
+#define STACKDEPTH(jinfo, stackinfo) (((stackinfo) & ~BC_FLAGS_MASK) + \
+	((jinfo)->method->is_synchronized() ? frame::interpreter_frame_monitor_size() : 0))
+
+
 void Thumb2_Enter(Thumb2_Info *jinfo)
 {
   int parms = jinfo->method->size_of_parameters();
   int extra_locals = jinfo->method->max_locals() - parms;
   unsigned *locals_info = jinfo->locals_info;
   int i;
+  unsigned stackdepth = 0;
 
   // Slow entry point - callee save
   // R0 = method
   // R2 = thread
   stm(jinfo->codebuf, I_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  bl(jinfo->codebuf, out_pos(jinfo->codebuf) + CODE_ALIGN - 4);
+  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  bl(jinfo->codebuf, out_pos(jinfo->codebuf) + FAST_ENTRY_OFFSET - 6);
   ldm(jinfo->codebuf, I_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+  out_16(jinfo->codebuf, 0);
 
   out_32(jinfo->codebuf, 0);	// Space for osr_table pointer
   out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
@@ -4974,43 +4593,20 @@
   out_32(jinfo->codebuf, 0);
   out_32(jinfo->codebuf, 0);
 
-  // OSR entry point == Slow entry + 16 - caller save
-  // R0 = entry point within compiled method
-  // R1 = locals - THUMB2_MAXLOCALS * 4
-  // R2 = thread
-  // R3 = locals - 31 * 4
-  {
-    int nlocals = jinfo->method->max_locals();
-
-    for (i = 0; i < nlocals; i++) {
-      Reg r = jinfo->jregs->r_local[i];
-      if (r) {
-	ldr_imm(jinfo->codebuf, r,
-		(i < 32) ? ARM_R3 : ARM_R1,
-		(i < 32) ? (31 - i) * 4 : (THUMB2_MAXLOCALS - i) * 4,
-	  	1, 0);
-      }
-    }
-    mov_reg(jinfo->codebuf, Rthread, ARM_R2);
-    mov_reg(jinfo->codebuf, ARM_PC, ARM_R0);
-  }
-
   out_align(jinfo->codebuf, CODE_ALIGN);
 
   // Fast entry point == Slow entry + 64 - caller save
   // R0 = method
   // R2 = thread
   stm(jinfo->codebuf, C_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-//  enter_leave(jinfo->codebuf, 1);
-  ldr_imm(jinfo->codebuf, Rstack, ARM_R2, THREAD_JAVA_SP, 1, 0);
-  Thumb2_Debug(jinfo, H_DEBUG_METHODENTRY);
+  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
   {
     unsigned stacksize;
 
     stacksize = (extra_locals + jinfo->method->max_stack()) * sizeof(int);
     stacksize += FRAME_SIZE + STACK_SPARE;
     if (!jinfo->is_leaf || stacksize > LEAF_STACK_SIZE) {
-      ldr_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_JAVA_STACK_BASE, 1, 0);
+      ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_STACK_BASE, 1, 0);
       sub_imm(jinfo->codebuf, ARM_R1, Rstack, stacksize + LEAF_STACK_SIZE);
       cmp_reg(jinfo->codebuf, ARM_R3, ARM_R1);
       it(jinfo->codebuf, COND_CS, IT_MASK_T);
@@ -5031,13 +4627,20 @@
 
   ldr_imm(jinfo->codebuf, ARM_IP, ARM_R0, METHOD_CONSTANTS, 1, 0);
 
-  sub_imm(jinfo->codebuf, Ristate, Rstack, FRAME_SIZE);
-
   add_imm(jinfo->codebuf, Rlocals, Rstack, (jinfo->method->max_locals()-1) * sizeof(int));
-  str_imm(jinfo->codebuf, Rlocals, Ristate, ISTATE_LOCALS, 1, 0);
+
+  sub_imm(jinfo->codebuf, Rstack, Rstack, FRAME_SIZE);
+
+  if (jinfo->use_istate) mov_reg(jinfo->codebuf, Ristate, Rstack);
+  store_istate(jinfo, Rstack, ISTATE_SELF_LINK, stackdepth);
+
+  store_istate(jinfo, Rstack, ISTATE_MONITOR_BASE, stackdepth);
+
+  store_istate(jinfo, Rlocals, ISTATE_LOCALS, stackdepth);
 
   if (jinfo->method->is_synchronized()) {
-    sub_imm(jinfo->codebuf, Rstack, Ristate, frame::interpreter_frame_monitor_size()*wordSize);
+    sub_imm(jinfo->codebuf, Rstack, Rstack, frame::interpreter_frame_monitor_size()*wordSize);
+    stackdepth = frame::interpreter_frame_monitor_size();
     if (jinfo->method->is_static()) {
       ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_POOL_HOLDER, 1, 0);
       ldr_imm(jinfo->codebuf, JAZ_V1, ARM_R3, KLASS_PART+KLASS_JAVA_MIRROR, 1, 0);
@@ -5045,46 +4648,39 @@
       ldr_imm(jinfo->codebuf, JAZ_V1, Rlocals, 0, 1, 0);
     }
     str_imm(jinfo->codebuf, JAZ_V1, Rstack, 4, 1, 0);
-  } else
-    mov_reg(jinfo->codebuf, Rstack, Ristate);
-
-  str_imm(jinfo->codebuf, ARM_R1, Ristate, ISTATE_MSG, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R1, Ristate, ISTATE_OOP_TEMP, 1, 0);
+  }
+
+  store_istate(jinfo, ARM_R1, ISTATE_MSG, stackdepth);
+  store_istate(jinfo, ARM_R1, ISTATE_OOP_TEMP, stackdepth);
 
   sub_imm(jinfo->codebuf, ARM_R3, Rstack, jinfo->method->max_stack() * sizeof(int));
-  str_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_JAVA_SP, 1, 0);
-
-  str_imm(jinfo->codebuf, Rstack, Ristate, ISTATE_STACK_BASE, 1, 0);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP, 1, 0);
+
+  store_istate(jinfo, Rstack, ISTATE_STACK_BASE, stackdepth);
 
   sub_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4);
-  str_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_STACK_LIMIT, 1, 0);
-
-  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_TOP_ZERO_FRAME, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_NEXT_FRAME, 1, 0);
+  store_istate(jinfo, ARM_R3, ISTATE_STACK_LIMIT, stackdepth);
+
+  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  store_istate(jinfo, ARM_R3, ISTATE_NEXT_FRAME, stackdepth);
 
   mov_imm(jinfo->codebuf, ARM_R3, INTERPRETER_FRAME);
-  str_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_FRAME_TYPE, 1, 0);
-
-  str_imm(jinfo->codebuf, Ristate, Ristate, ISTATE_MONITOR_BASE, 1, 0);
+  store_istate(jinfo, ARM_R3, ISTATE_FRAME_TYPE, stackdepth);
 
   mov_imm(jinfo->codebuf, ARM_R1, 0);   // set last SP to zero before
                                         // setting FP
-  str_imm(jinfo->codebuf, ARM_R1, ARM_R2, THREAD_LAST_JAVA_SP, 1, 0);
-  add_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_NEXT_FRAME);
-  str_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_TOP_ZERO_FRAME, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_LAST_JAVA_FP, 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_JAVA_SP, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_LAST_JAVA_SP, 1, 0);
+  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
+  add_imm(jinfo->codebuf, ARM_R3, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) + ISTATE_NEXT_FRAME);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
+  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP, 1, 0);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
 
   ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_CACHE, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_CONSTANTS, 1, 0);
-
-  str_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_THREAD, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
-
-  str_imm(jinfo->codebuf, Ristate, Ristate, ISTATE_SELF_LINK, 1, 0);
-
-  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  store_istate(jinfo, ARM_R3, ISTATE_CONSTANTS, stackdepth);
+
+  store_istate(jinfo, Rthread, ISTATE_THREAD, stackdepth);
+  store_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
 
   if (jinfo->method->is_synchronized()) {
     unsigned loc_retry, loc_failed, loc_success, loc_exception;
@@ -5093,14 +4689,13 @@
     //
     // Try to acquire the monitor. Seems very sub-optimal
     // 		ldr	r3, [JAZ_V1, #0]
-    // 		sub	r1, Ristate, #8
     // 		orr	r3, r3, #1
-    // 		str	r3, [r1, #0]
+    // 		str	r3, [Rstack, #0]
     // 	retry:
     // 		ldrex	r0, [JAZ_V1, #0]
     // 		cmp	r3, r0
     // 		bne	failed
-    // 		strex	r0, r1, [JAZ_V1, #0]
+    // 		strex	r0, Rstack, [JAZ_V1, #0]
     // 		cbz	r0, success
     // 		b	retry
     // 	failed:
@@ -5109,15 +4704,14 @@
     // 		<success - acquired the monitor>
     //
     ldr_imm(jinfo->codebuf, ARM_R3, JAZ_V1, 0, 1, 0);
-    sub_imm(jinfo->codebuf, ARM_R1, Ristate, frame::interpreter_frame_monitor_size()*wordSize);
     orr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 1);
-    str_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
+    str_imm(jinfo->codebuf, ARM_R3, Rstack, 0, 1, 0);
     loc_retry = out_loc(jinfo->codebuf);
 // retry:
     ldrex_imm(jinfo->codebuf, ARM_R0, JAZ_V1, 0);
     cmp_reg(jinfo->codebuf, ARM_R3, ARM_R0);
     loc_failed = forward_16(jinfo->codebuf);
-    strex_imm(jinfo->codebuf, ARM_R0, ARM_R1, JAZ_V1, 0);
+    strex_imm(jinfo->codebuf, ARM_R0, Rstack, JAZ_V1, 0);
     loc_success = forward_16(jinfo->codebuf);
     branch_uncond(jinfo->codebuf, loc_retry);
     bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
@@ -5128,8 +4722,6 @@
     bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
     cbz_patch(jinfo->codebuf, ARM_R0, loc_exception);
     cbz_patch(jinfo->codebuf, ARM_R0, loc_success);
-//    mov_imm(jinfo->codebuf, ARM_R0, 0+CONSTMETHOD_CODEOFFSET);
-//    bl(jinfo->codebuf, handlers[H_MONITOR]);
 // success:
 
   }
@@ -5140,11 +4732,11 @@
     for (i = 0; i < nlocals; i++) {
       Reg r = jinfo->jregs->r_local[i];
       if (r) {
-	unsigned stackdepth = 0;
-	if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-	if (i < parms || (locals_info[i] & (1<<LOCAL_REF))) {
+	unsigned stackdepth = STACKDEPTH(jinfo, 0);
+        if (i < parms)
 	  load_local(jinfo, r, i, stackdepth);
-	}
+        else if (locals_info[i] & (1<<LOCAL_REF))
+          mov_reg(jinfo->codebuf, r, ARM_R1);
       }
     }
   }
@@ -5176,14 +4768,7 @@
   H_D2F,
 };
 
-// Generate code for a load of a jlong.  If the operand is volatile,
-// generate a sequence of the form
-//
-// .Lsrc:
-// 	ldrexd r0, r1 , [src]
-// 	strexd r2 , r0, r1, [src]
-// 	cmp    r2, #0
-// 	bne    .Lsrc
+// Generate code for a load of a jlong.
 
 void Thumb2_load_long(Thumb2_Info *jinfo, Reg r_lo, Reg r_hi, Reg base,
 		      int field_offset,
@@ -5192,22 +4777,17 @@
   CodeBuf *codebuf = jinfo->codebuf;
   if (is_volatile) {
     Reg r_addr = base;
-    Reg tmp = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base));
     if (field_offset) {
-      r_addr = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base) | (1<<tmp));
+      r_addr = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base));
       add_imm(jinfo->codebuf, r_addr, base, field_offset);
     }
-    int loc = out_loc(codebuf);
     ldrexd(codebuf, r_lo, r_hi, r_addr);
-    strexd(codebuf, tmp, r_lo, r_hi, r_addr);
-    cmp_imm(codebuf, tmp, 0);
-    branch(codebuf, COND_NE, loc);
   } else {
     ldrd_imm(codebuf, r_lo, r_hi, base, field_offset, 1, 0);
   }
 }
 
-// Generate code for a load of a jlong.  If the operand is volatile,
+// Generate code for a store of a jlong.  If the operand is volatile,
 // generate a sequence of the form
 //
 // .Ldst
@@ -5459,16 +5039,26 @@
   unsigned stackdepth;
 
   for (bci = start; bci < code_size; ) {
-#ifdef T2EE_PRINT_DISASS
-    unsigned start_idx = jinfo->codebuf->idx;
+    opcode = code_base[bci];
+    stackinfo = bc_stackinfo[bci];
+#ifdef T2_PRINT_DISASS
+    unsigned start_idx;
+#endif
+
+    if (stackinfo & BC_BRANCH_TARGET) Thumb2_Flush(jinfo);
+
+    if (!OSPACE && (stackinfo & BC_BACK_TARGET)) {
+      if (out_pos(codebuf) & 0x02) nop_16(codebuf);
+      if (out_pos(codebuf) & 0x04) nop_32(codebuf);
+    }
+
+#ifdef T2_PRINT_DISASS
+    start_idx = jinfo->codebuf->idx;
     if (start_bci[start_idx] == -1) start_bci[start_idx] = bci;
 #endif
-    opcode = code_base[bci];
-    stackinfo = bc_stackinfo[bci];
-
-    if (stackinfo & BC_BRANCH_TARGET) Thumb2_Flush(jinfo);
+
     JASSERT(!(stackinfo & BC_COMPILED), "code already compiled for this bytecode?");
-    stackdepth = stackinfo & ~BC_FLAGS_MASK;
+    stackdepth = STACKDEPTH(jinfo, stackinfo); // Stackdepth here is adjusted for monitors
     bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2) | BC_COMPILED;
 
     if (opcode > OPC_LAST_JAVA_OP)
@@ -5491,13 +5081,13 @@
 	      (address)(code_base+bci), (address)(code_base+code_size)));
     }
 
-    if (IS_DEAD(stackinfo) || IS_ZOMBIE(stackinfo)) {
+    if (IS_DEAD(stackinfo)) {
       unsigned zlen = 0;
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
       unsigned start_bci = bci;
 #endif
 
-      Thumb2_Exit(jinfo, H_ZOMBIE, bci, stackdepth);
+      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
       do {
 	zlen += len;
 	bci += len;
@@ -5506,7 +5096,7 @@
 	stackinfo = bc_stackinfo[bci];
 
 	if (stackinfo & BC_BRANCH_TARGET) break;
-	if (!(IS_DEAD(stackinfo) || IS_ZOMBIE(stackinfo))) break;
+	if (!IS_DEAD(stackinfo)) break;
 
 	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
 
@@ -5524,21 +5114,20 @@
 	}
 
       } while (1);
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
       end_bci[start_idx] = start_bci + zlen;
 #endif
-      jinfo->zombie_bytes += zlen;
       continue;
     }
 
 #if 0
-    if (bci >= 2620) {
+    if (bci >= 4) {
       unsigned zlen = 0;
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
       unsigned start_bci = bci;
 #endif
 
-      Thumb2_Exit(jinfo, H_ZOMBIE, bci, stackdepth);
+      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
       do {
 	zlen += len;
 	bci += len;
@@ -5548,6 +5137,8 @@
 
 	if (stackinfo & BC_BRANCH_TARGET) break;
 
+	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
+
 	if (opcode > OPC_LAST_JAVA_OP) {
 	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
 	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
@@ -5562,15 +5153,14 @@
 	}
 
       } while (1);
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
       end_bci[start_idx] = start_bci + zlen;
 #endif
-      jinfo->zombie_bytes += zlen;
       continue;
     }
 #endif
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
     end_bci[start_idx] = bci + len;
 #endif
 
@@ -5637,16 +5227,6 @@
 	    v = (unsigned)constants->int_at(index);
 	    len += Thumb2_Imm(jinfo, v, bci+len);
 	    break;
-#if 0
-	  case JVM_CONSTANT_String:
-	    v = (unsigned)constants->resolved_string_at(index);
-	    len += Thumb2_Imm(jinfo, v, bci+len);
-	    break;
-	  case JVM_CONSTANT_Class:
-	    v = (unsigned)constants->resolved_klass_at(index)->klass_part()->java_mirror();
-	    len += Thumb2_Imm(jinfo, v, bci+len);
-	    break;
-#endif
 	  case JVM_CONSTANT_Long:
 	  case JVM_CONSTANT_Double: {
 	    unsigned long long v;
@@ -5660,7 +5240,7 @@
 	    Thumb2_Spill(jinfo, 1, 0);
 	    r = JSTACK_REG(jstack);
 	    PUSH(jstack, r);
-	    ldr_imm(jinfo->codebuf, r, Ristate, ISTATE_METHOD, 1, 0);
+	    load_istate(jinfo, r, ISTATE_METHOD, stackdepth+1);
 	    ldr_imm(jinfo->codebuf, r, r, METHOD_CONSTANTS, 1, 0);
 	    ldr_imm(jinfo->codebuf, r, r, CONSTANTPOOL_BASE + (index << 2), 1, 0);
 	    if (v == JVM_CONSTANT_Class)
@@ -5674,8 +5254,8 @@
 	    Thumb2_Flush(jinfo);
 	    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  Thumb2_save_locals(jinfo, stackdepth);
-	    mov_imm(jinfo->codebuf, ARM_R1, opcode != opc_ldc);
-	    bl(jinfo->codebuf, handlers[H_LDC]);
+//	    mov_imm(jinfo->codebuf, ARM_R1, opcode != opc_ldc);
+	    bl(jinfo->codebuf, handlers[opcode == opc_ldc ? H_LDC : H_LDC_W]);
 	  Thumb2_restore_locals(jinfo, stackdepth);
 	    ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT, 1, 0);
 	    mov_imm(jinfo->codebuf, ARM_R2, 0);
@@ -5833,7 +5413,6 @@
 	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
 	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
 	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
-	and_imm(jinfo->codebuf, shift, shift, 63);
 	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
 	tst_imm(jinfo->codebuf, shift, 32);
 	loc1 = forward_16(jinfo->codebuf);
@@ -5863,7 +5442,6 @@
 	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
 	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
 	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
-	and_imm(jinfo->codebuf, shift, shift, 63);
 	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
 	tst_imm(jinfo->codebuf, shift, 32);
 	loc1 = forward_16(jinfo->codebuf);
@@ -5893,7 +5471,6 @@
 	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
 	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
 	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
-	and_imm(jinfo->codebuf, shift, shift, 63);
 	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
 	tst_imm(jinfo->codebuf, shift, 32);
 	loc1 = forward_16(jinfo->codebuf);
@@ -6035,14 +5612,10 @@
 	  mov_reg(jinfo->codebuf, ARM_R0, r_lho);
 	  mov_reg(jinfo->codebuf, ARM_R1, r_rho);
 	}
-#if 1
 	if (opcode == opc_frem)
 	  bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
 	else
 	  blx(jinfo->codebuf, OPCODE2HANDLER(opcode));
-#else
-	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
-#endif
 	PUSH(jstack, ARM_R0);
 	break;
       }
@@ -6222,7 +5795,6 @@
 	  int nlocals = jinfo->method->max_locals();
 	  r = Thumb2_Tmp(jinfo, 0);
 	  stackdepth -= jstack->depth;
-	  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
 	  load_local(jinfo, r, local, stackdepth);
 	  add_imm(jinfo->codebuf, r, r, constant);
 	  store_local(jinfo, r, local, stackdepth);
@@ -6257,7 +5829,7 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_locals(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	  Thumb2_restore_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
@@ -6297,11 +5869,6 @@
 	break;
       }
 
-      case opc_monitorexit:
-      case opc_monitorenter:
-	  Thumb2_Exit(jinfo, H_MONITOR, bci, stackdepth);
-	  break;
-
       case opc_getstatic: {
 	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
         ConstantPoolCacheEntry* cache;
@@ -6325,7 +5892,7 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_locals(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	  Thumb2_restore_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
@@ -6338,7 +5905,7 @@
 	  r_hi = PUSH(jstack, JSTACK_REG(jstack));
 	  r_lo = PUSH(jstack, JSTACK_REG(jstack));
 	  r_addr = Thumb2_Tmp(jinfo, (1<<r_hi) | (1<<r_lo));
-	  ldr_imm(jinfo->codebuf, r_lo, Ristate, ISTATE_CONSTANTS, 1, 0);
+	  load_istate(jinfo, r_lo, ISTATE_CONSTANTS, stackdepth+2);
 	  ldr_imm(jinfo->codebuf, r_addr, r_lo, CP_OFFSET + (index << 4) + 4, 1, 0);
 	  Thumb2_load_long(jinfo, r_lo, r_hi, r_addr, field_offset,
 			   cache->is_volatile());
@@ -6347,7 +5914,7 @@
 	  Thumb2_Spill(jinfo, 1, 0);
 	  r = JSTACK_REG(jstack);
 	  PUSH(jstack, r);
-	  ldr_imm(jinfo->codebuf, r, Ristate, ISTATE_CONSTANTS, 1, 0);
+	  load_istate(jinfo, r, ISTATE_CONSTANTS, stackdepth+1);
 	  ldr_imm(jinfo->codebuf, r, r, CP_OFFSET + (index << 4) + 4, 1, 0);
 	  if (tos_type == btos)
 	    ldrsb_imm(jinfo->codebuf, r, r, field_offset, 1, 0);
@@ -6390,7 +5957,7 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_locals(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	  Thumb2_restore_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 
 	  break;
 	}
@@ -6456,7 +6023,7 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_locals(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	  Thumb2_restore_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
@@ -6475,7 +6042,7 @@
 	  Thumb2_Spill(jinfo, 1, (1<<r_lo)|(1<<r_hi));
 	  r_obj = JSTACK_PREFER(jstack, ~((1<<r_lo)|(1<<r_hi)));
 	  JASSERT(r_obj != r_lo && r_obj != r_hi, "corruption in putstatic");
-	  ldr_imm(jinfo->codebuf, r_obj, Ristate, ISTATE_CONSTANTS, 1, 0);
+	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-2);
 	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4, 1, 0);
 	  Thumb2_store_long(jinfo, r_lo, r_hi, r_obj, field_offset, cache->is_volatile());
 	} else {
@@ -6485,7 +6052,7 @@
 	  Thumb2_Spill(jinfo, 1, (1<<r));
 	  r_obj = JSTACK_PREFER(jstack, ~(1<<r));
 	  JASSERT(r_obj != r, "corruption in putstatic");
-	  ldr_imm(jinfo->codebuf, r_obj, Ristate, ISTATE_CONSTANTS, 1, 0);
+	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-1);
 	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4, 1, 0);
 	  if (tos_type == btos)
 	    strb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
@@ -6507,6 +6074,7 @@
 	break;
       }
 
+      case opc_invokevirtual:
       case opc_invokestatic:
       case opc_invokespecial: {
 	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
@@ -6515,6 +6083,21 @@
 	unsigned loc;
 	methodOop callee;
 
+	// Call Debug if we're about to enter a synchronized method.
+#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
+	if (DebugSwitch && jinfo->method->is_synchronized()) {
+	  stm(jinfo->codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+	  add_imm(jinfo->codebuf, ARM_R0, ISTATE_REG(jinfo), ISTATE_OFFSET(jinfo, stackdepth, 0));
+	  mov_imm(jinfo->codebuf, ARM_IP, (u32)Debug);
+	  load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
+	  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
+	  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
+	  store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
+	  blx_reg(jinfo->codebuf, ARM_IP);
+	  ldm(jinfo->codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, POP_FD, 1);
+	}
+#undef DEBUG_REGSET
+
         cache = cp->entry_at(index);
         if (!cache->is_resolved((Bytecodes::Code)opcode)) {
 	  Thumb2_Flush(jinfo);
@@ -6522,17 +6105,18 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf,
-	    handlers[opcode == opc_invokestatic ? H_INVOKESTATIC : H_INVOKESPECIAL]);
-	  Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	    handlers[opcode == opc_invokestatic ? H_INVOKESTATIC :
+		     opcode == opc_invokespecial ? H_INVOKESPECIAL : H_INVOKEVIRTUAL]);
+	  Thumb2_invoke_restore(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
-	callee = (methodOop)cache->f1();
-
-	if (handle_special_method(callee, jinfo, stackdepth))
-	  break;
-
-	if (callee->is_accessor()) {
+	callee = opcode == opc_invokevirtual ? (methodOop)cache->f2() : (methodOop)cache->f1();
+	if ((opcode != opc_invokevirtual || cache->is_vfinal()) && callee->is_accessor()) {
+
+	  if (handle_special_method(callee, jinfo, stackdepth))
+	    break;
+
 	  u1 *code = callee->code_base();
 	  int index = GET_NATIVE_U2(&code[2]);
 	  constantPoolCacheOop callee_cache = callee->constants()->cache();
@@ -6540,11 +6124,6 @@
 	  Reg r_obj, r;
 
 	  if (entry->is_resolved(Bytecodes::_getfield)) {
-#if 0
-	    tty->print("Inlining accessor (opcode = %s) ", opcode == opc_invokestatic ? "invokestatic" : "invokespecial");
-	    callee->print_short_name(tty);
-	    tty->print("\n");
-#endif
 	    JASSERT(cache->parameter_size() == 1, "not 1 parameter to accessor");
 
 	    TosState tos_type = entry->flag_state();
@@ -6558,7 +6137,7 @@
 	    r = JSTACK_REG(jstack);
 	    PUSH(jstack, r);
 	    if (tos_type == btos)
-	      ldrsb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
+	      ldrb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
 	    else if (tos_type == ctos)
 	      ldrh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
 	    else if (tos_type == stos)
@@ -6569,44 +6148,56 @@
 	  }
 	}
 
-	Thumb2_Flush(jinfo);
-  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
-	ldr_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_CONSTANTS, 1, 0);
-	mov_imm(jinfo->codebuf, ARM_R1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-	if (opcode == opc_invokespecial)
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int), 1, 0);
-	ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, CP_OFFSET + (index << 4) + 4, 1, 0);
-  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
-	if (opcode == opc_invokespecial)
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 0, 1, 0); // Null pointer check - cbz better?
-	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
+ 	Thumb2_Flush(jinfo);
+	if (OSPACE) {
+	  Thumb2_invoke_save(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[
+	      opcode == opc_invokestatic ? H_INVOKESTATIC_RESOLVED :
+	      opcode == opc_invokespecial ? H_INVOKESPECIAL_RESOLVED :
+	      cache->is_vfinal() ? H_INVOKEVFINAL : H_INVOKEVIRTUAL_RESOLVED]);
+	  Thumb2_invoke_restore(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
+ 	mov_imm(jinfo->codebuf, ARM_R1, 0);
+	if (opcode != opc_invokestatic)
+ 	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int), 1, 0);
+	if (opcode != opc_invokevirtual || cache->is_vfinal())
+	  load_istate(jinfo, ARM_R0, ISTATE_CONSTANTS, stackdepth);
+	ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
+	if (opcode != opc_invokestatic)
+	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4, 1, 0);
+	if (opcode != opc_invokevirtual || cache->is_vfinal())
+	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0,
+		CP_OFFSET + (index << 4) + (opcode == opc_invokevirtual ? 8 : 4), 1, 0);
+	else
+	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R3, INSTANCEKLASS_VTABLE_OFFSET + cache->f2() * 4, 1, 0);
+	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
+ 	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
 	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
-	ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_BCP, 1, 0);
-	str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  Thumb2_Debug(jinfo, H_DEBUG_METHODCALL);
-	Thumb2_invoke_save(jinfo, stackdepth);
-  sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
-	ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
-	mov_reg(jinfo->codebuf, ARM_R2, Rthread);
-  str_imm(jinfo->codebuf, Rstack, Ristate, ISTATE_STACK, 1, 0);
-add_imm(jinfo->codebuf, ARM_R3, ARM_R3, CODE_ALIGN_SIZE);
-//	enter_leave(jinfo->codebuf, 0);
-	blx_reg(jinfo->codebuf, ARM_R3);
-//	enter_leave(jinfo->codebuf, 1);
-  ldr_imm(jinfo->codebuf, Rthread, Ristate, ISTATE_THREAD, 1, 0);
-#ifdef USE_RLOCAL
-  ldr_imm(jinfo->codebuf, Rlocals, Ristate, ISTATE_LOCALS, 1, 0);
-#endif
+ 	ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED, 1, 0);
+	store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
+ 	str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
+ 	Thumb2_invoke_save(jinfo, stackdepth);
+	sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
+ 	ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
+	store_istate(jinfo, Rstack, ISTATE_STACK, stackdepth+1);
+	add_imm(jinfo->codebuf, ARM_R3, ARM_R3, FAST_ENTRY_OFFSET);
+ 	blx_reg(jinfo->codebuf, ARM_R3);
+ 	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
+	stackdepth = STACKDEPTH(jinfo, bc_stackinfo[bci+len]);
 	ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_STACK_LIMIT, 1, 0);
-	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
-	Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	load_istate(jinfo, ARM_R2, ISTATE_STACK_LIMIT, stackdepth);
+ 	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+	Thumb2_invoke_restore(jinfo, stackdepth);
 	mov_imm(jinfo->codebuf, ARM_R0, 0);   // set last SP to zero
 					      // before setting FP
 	str_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
 	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+	Thumb2_invoke_restore(jinfo, stackdepth);
 	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
 	ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC, 1, 0);
 	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP, 1, 0);
@@ -6624,162 +6215,14 @@
 	int index = GET_NATIVE_U2(code_base+bci+1);
 	unsigned loc, loc_inc_ex;
 
-// Currently we just call the unresolved invokeinterface entry for resolved /
-// unresolved alike!
-    Thumb2_Flush(jinfo);
-    Thumb2_invoke_save(jinfo, stackdepth);
-    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-    mov_imm(jinfo->codebuf, ARM_R1, index);
-    blx(jinfo->codebuf, handlers[H_INVOKEINTERFACE]);
-    Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
-	break;
-      }
-
-      case opc_invokevirtual: {
-	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-        ConstantPoolCacheEntry* cache;
-	int index = GET_NATIVE_U2(code_base+bci+1);
-	unsigned loc;
-
-        cache = cp->entry_at(index);
-        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
-	  Thumb2_Flush(jinfo);
-	  Thumb2_invoke_save(jinfo, stackdepth);
-	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  mov_imm(jinfo->codebuf, ARM_R1, index);
-	  blx(jinfo->codebuf, handlers[H_INVOKEVIRTUAL]);
-	  Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
-	  break;
-	}
-
-	if (cache->is_vfinal()) {
-	  methodOop callee = (methodOop)cache->f2();
-
-	  if (handle_special_method(callee, jinfo, stackdepth))
-	    break;
-
-	  if (callee->is_accessor()) {
-	    u1 *code = callee->code_base();
-	    int index = GET_NATIVE_U2(&code[2]);
-	    constantPoolCacheOop callee_cache = callee->constants()->cache();
-	    ConstantPoolCacheEntry *entry = callee_cache->entry_at(index);
-	    Reg r_obj, r;
-
-	    if (entry->is_resolved(Bytecodes::_getfield)) {
-#if 0
-	      tty->print("Inlining accessor (opcode = invokevfinal) ");
-	      callee->print_short_name(tty);
-	      tty->print("\n");
-#endif
-	      JASSERT(cache->parameter_size() == 1, "not 1 parameter to accessor");
-
-	      TosState tos_type = entry->flag_state();
-	      int field_offset = entry->f2();
-
-	      JASSERT(tos_type == btos || tos_type == ctos || tos_type == stos || tos_type == atos || tos_type == itos, "not itos or atos");
-
-	      Thumb2_Fill(jinfo, 1);
-	      r_obj = POP(jstack);
-	      Thumb2_Spill(jinfo, 1, 0);
-	      r = JSTACK_REG(jstack);
-	      PUSH(jstack, r);
-	      if (tos_type == btos)
-		ldrsb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	      else if (tos_type == ctos)
-		ldrh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	      else if (tos_type == stos)
-		ldrsh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	      else
-		ldr_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	      break;
-	    }
-	  }
-	}
-
+	// Currently we just call the unresolved invokeinterface entry for resolved /
+	// unresolved alike!
 	Thumb2_Flush(jinfo);
-	if (cache->is_vfinal()) {
-  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_CONSTANTS, 1, 0);
-	  mov_imm(jinfo->codebuf, ARM_R1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int), 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, CP_OFFSET + (index << 4) + 8, 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 0, 1, 0); // Null pointer check - cbz better?
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED, 1, 0);
-  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
-	  str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  Thumb2_Debug(jinfo, H_DEBUG_METHODCALL);
 	Thumb2_invoke_save(jinfo, stackdepth);
-  sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_BCP, 1, 0);
-	  mov_reg(jinfo->codebuf, ARM_R2, Rthread);
-  str_imm(jinfo->codebuf, Rstack, Ristate, ISTATE_STACK, 1, 0);
-add_imm(jinfo->codebuf, ARM_R3, ARM_R3, CODE_ALIGN_SIZE);
-//	  enter_leave(jinfo->codebuf, 0);
-	  blx_reg(jinfo->codebuf, ARM_R3);
-//	  enter_leave(jinfo->codebuf, 1);
-  ldr_imm(jinfo->codebuf, Rthread, Ristate, ISTATE_THREAD, 1, 0);
-#ifdef USE_RLOCAL
-  ldr_imm(jinfo->codebuf, Rlocals, Ristate, ISTATE_LOCALS, 1, 0);
-#endif
-	  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_STACK_LIMIT, 1, 0);
-	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
-	Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
-	  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-	  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP, 1, 0);
-	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R3, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
-	  break;
-	} else {
-  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int), 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4, 1, 0);
-	  mov_imm(jinfo->codebuf, ARM_R1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R3, INSTANCEKLASS_VTABLE_OFFSET + cache->f2() * 4, 1, 0);
-  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_BCP, 1, 0);
-	  str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  Thumb2_Debug(jinfo, H_DEBUG_METHODCALL);
-	Thumb2_invoke_save(jinfo, stackdepth);
-  sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
-	  mov_reg(jinfo->codebuf, ARM_R2, Rthread);
-  str_imm(jinfo->codebuf, Rstack, Ristate, ISTATE_STACK, 1, 0);
-add_imm(jinfo->codebuf, ARM_R3, ARM_R3, CODE_ALIGN_SIZE);
-//	  enter_leave(jinfo->codebuf, 0);
-	  blx_reg(jinfo->codebuf, ARM_R3);
-//	  enter_leave(jinfo->codebuf, 1);
-  ldr_imm(jinfo->codebuf, Rthread, Ristate, ISTATE_THREAD, 1, 0);
-#ifdef USE_RLOCAL
-  ldr_imm(jinfo->codebuf, Rlocals, Ristate, ISTATE_LOCALS, 1, 0);
-#endif
-	  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_STACK_LIMIT, 1, 0);
-	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
-	Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
-	  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-	  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R3, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
-	}
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	mov_imm(jinfo->codebuf, ARM_R1, index);
+	blx(jinfo->codebuf, handlers[H_INVOKEINTERFACE]);
+	Thumb2_invoke_restore(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	break;
       }
 
@@ -6810,7 +6253,18 @@
 
       case opc_jsr_w:
       case opc_jsr: {
-	Thumb2_Jsr(jinfo , bci, stackdepth);
+	int offset = opcode == opc_jsr ?
+		GET_JAVA_S2(jinfo->code_base + bci + 1) :
+		GET_JAVA_U4(jinfo->code_base + bci + 1);
+	Reg r;
+
+	Thumb2_Spill(jinfo, 1, 0);
+	r = JSTACK_REG(jstack);
+	PUSH(jstack, r);
+	mov_imm(jinfo->codebuf, r, bci + ((opcode == opc_jsr) ? 3 : 5));
+	Thumb2_Flush(jinfo);
+	bci = Thumb2_Goto(jinfo, bci, offset, len);
+	len = 0;
 	break;
       }
 
@@ -6819,25 +6273,20 @@
 	break;
       }
 
-      case opc_athrow:
-	Thumb2_Exit(jinfo, H_ATHROW, bci, stackdepth);
-	break;
-
-      case opc_goto: {
-	int offset = GET_JAVA_S2(jinfo->code_base + bci + 1);
+      case opc_goto:
+      case opc_goto_w: {
+	int offset = opcode == opc_goto ?
+		GET_JAVA_S2(jinfo->code_base + bci + 1) :
+		GET_JAVA_U4(jinfo->code_base + bci + 1);
 	Thumb2_Flush(jinfo);
 	bci = Thumb2_Goto(jinfo, bci, offset, len, stackdepth);
 	len = 0;
 	break;
       }
 
-      case opc_goto_w: {
-	int offset = GET_JAVA_U4(jinfo->code_base + bci + 1);
-	Thumb2_Flush(jinfo);
-	bci = Thumb2_Goto(jinfo, bci, offset, len, stackdepth);
-	len = 0;
+      case opc_athrow:
+	Thumb2_Exit(jinfo, H_ATHROW, bci, stackdepth);
 	break;
-      }
 
       case opc_ifeq:
       case opc_ifne:
@@ -6889,7 +6338,6 @@
       case opc_freturn:
       case opc_areturn:
 	Thumb2_Return(jinfo, opcode, bci, stackdepth);
-	if (!jinfo->compiled_return) jinfo->compiled_return = bci;
 	break;
 
       case opc_return_register_finalizer: {
@@ -6907,12 +6355,12 @@
 	loc_eq = forward_16(jinfo->codebuf);
 	Thumb2_save_locals(jinfo, stackdepth);
 	mov_reg(jinfo->codebuf, ARM_R1, r);
-	ldr_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
+	load_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
 	ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
 	add_imm(jinfo->codebuf, ARM_R0, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	str_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
+	store_istate(jinfo, ARM_R0, ISTATE_BCP, stackdepth);
 	sub_imm(jinfo->codebuf, ARM_R0, Rstack, 4);
-	str_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_STACK, 1, 0);
+	store_istate(jinfo, ARM_R0, ISTATE_STACK, stackdepth);
 
 	mov_reg(jinfo->codebuf, ARM_R0, Rthread);
 	mov_imm(jinfo->codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc);
@@ -7005,6 +6453,31 @@
 	break;
       }
 
+      case opc_monitorenter:
+	Thumb2_Flush(jinfo);
+	Thumb2_invoke_save(jinfo, stackdepth);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	bl(jinfo->codebuf, handlers[H_MONITORENTER]);
+	Thumb2_invoke_restore(jinfo, stackdepth);
+	break;
+
+      case opc_monitorexit: {
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R1, r);
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+        Thumb2_save_locals(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_MONITOREXIT]);
+        Thumb2_restore_locals(jinfo, stackdepth);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	it(jinfo->codebuf, COND_NE, IT_MASK_T);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
+	break;
+      }
+
       case opc_newarray: {
 	Reg r;
 	unsigned loc;
@@ -7103,6 +6576,8 @@
 	Thumb2_Fill(jinfo, 1);
 	r = POP(jstack);
 
+	Thumb2_Flush(jinfo);
+
 	table_loc = out_loc(jinfo->codebuf);
 	for (i = 0, tablep = table; i < npairs; i++) {
 	  unsigned match;
@@ -7169,6 +6644,7 @@
 
 	Thumb2_Fill(jinfo, 1);
 	rs = POP(jstack);
+	Thumb2_Flush(jinfo);
 	r = Thumb2_Tmp(jinfo, (1<<rs));
 	sub_imm(jinfo->codebuf, r, rs, low);
 	cmp_imm(jinfo->codebuf, r, (high-low)+1);
@@ -7245,7 +6721,6 @@
 	    int nlocals = jinfo->method->max_locals();
 	    r = ARM_IP;
 	    stackdepth -= jstack->depth;
-	    if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
 	    load_local(jinfo, r, local, stackdepth);
 	    add_imm(jinfo->codebuf, r, r, constant);
 	    store_local(jinfo, r, local, stackdepth);
@@ -7276,7 +6751,7 @@
 	break;
     }
     bci += len;
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
     if (len == 0) {
       if (start_idx == jinfo->codebuf->idx) start_bci[start_idx] = -1;
     } else
@@ -7433,45 +6908,6 @@
   return 0;
 }
 
-static int DebugSwitch = 1;
-
-extern "C" void Debug_Ignore_Safepoints(void)
-{
-	printf("Ignore Safepoints\n");
-}
-
-extern "C" void Debug_Notice_Safepoints(void)
-{
-	printf("Notice Safepoints\n");
-}
-
-extern "C" void Debug_ExceptionReturn(interpreterState istate, intptr_t *stack)
-{
-  JavaThread *thread = istate->thread();
-
-  if (thread->has_pending_exception()) {
-    Handle ex(thread, thread->pending_exception());
-    tty->print_cr("Exception %s", Klass::cast(ex->klass())->external_name());
-  }
-}
-
-extern "C" void Debug_Stack(intptr_t *stack)
-{
-  int i;
-  char msg[16];
-
-  tty->print("  Stack:");
-  for (i = 0; i < 6; i++) {
-    tty->print(" [");
-    sprintf(msg, "%d", i);
-    tty->print(msg);
-    tty->print("] = ");
-    sprintf(msg, "%08x", (int)stack[i]);
-    tty->print(msg);
-  }
-  tty->cr();
-}
-
 extern "C" void Debug_MethodEntry(interpreterState istate, intptr_t *stack, methodOop callee)
 {
 #if 0
@@ -7490,6 +6926,7 @@
 
 extern "C" void Debug_MethodExit(interpreterState istate, intptr_t *stack)
 {
+#if 0
   if (DebugSwitch) {
     methodOop method = istate->method();
     JavaThread *thread = istate->thread();
@@ -7503,6 +6940,7 @@
     tty->flush();
     if (exc) tty->print_cr("Exception %s", exc->print_value_string());
   }
+#endif
 }
 
 extern "C" void Debug_MethodCall(interpreterState istate, intptr_t *stack, methodOop callee)
@@ -7520,26 +6958,17 @@
   }
 #endif
 }
-
-extern "C" int Debug_irem_Handler(int a, int b)
-{
-	printf("%d %% %d\n", a, b);
-	return a%b;
-}
-
 extern "C" void Thumb2_Install(methodOop mh, u32 entry);
 
-#define IS_COMPILED(e, cb) ((e) >= (unsigned)(cb) && (e) < (unsigned)(cb) + (cb)->size)
-
 extern "C" unsigned cmpxchg_ptr(unsigned new_value, volatile unsigned *ptr, unsigned cmp_value);
 static volatile unsigned compiling;
 static unsigned CompileCount = 0;
-static unsigned MaxCompile = 130;
+static unsigned MaxCompile = 10000;
 
 #define COMPILE_ONLY	0
 #define COMPILE_COUNT	0
 #define DISASS_AFTER	0
-//#define COMPILE_LIST	0
+//#define COMPILE_LIST
 
 #ifdef COMPILE_LIST
 static const char *compile_list[] = {
@@ -7549,10 +6978,9 @@
 
 static unsigned compiled_methods = 0;
 
-#ifdef T2EE_PRINT_STATISTICS
+#ifdef T2_PRINT_STATISTICS
 static unsigned bytecodes_compiled = 0;
 static unsigned arm_code_generated = 0;
-static unsigned total_zombie_bytes = 0;
 static clock_t total_compile_time = 0;
 #endif
 
@@ -7585,7 +7013,7 @@
   Thumb2_Entrypoint thumb_entry;
   int compiled_accessor;
 
-  if (!(CPUInfo & ARCH_THUMBEE))
+  if (!(CPUInfo & ARCH_THUMB2))
 	UseCompiler = false;
 
   if (!UseCompiler || method->is_not_compilable()) {
@@ -7600,7 +7028,7 @@
     compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
     if (compiled_offset == 0) return 0;
     thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
-    thumb_entry.osr_entry = (unsigned)cmethod->osr_entry | TBIT;
+    thumb_entry.regusage = cmethod->regusage;
     return *(unsigned long long *)&thumb_entry;
   }
 
@@ -7615,8 +7043,7 @@
   // Othersize we have difficulty access the locals from the stack pointer
   //
   if (code_size > THUMB2_MAX_BYTECODE_SIZE ||
-		(method->max_locals() + method->max_stack()) >= 1000 ||
-		method->has_monitor_bytecodes()) {
+		(method->max_locals() + method->max_stack()) >= 1000) {
         method->set_not_compilable();
 	return 0;
   }
@@ -7635,7 +7062,10 @@
 		if (strcmp(s, method->name_and_sig_as_C_string()) == 0)
 			break;
 	}
-	if (!s) return 0;
+	if (!s) {
+		method->set_not_compilable();
+		return 0;
+	}
   }
 #endif
 
@@ -7652,12 +7082,12 @@
 
   if (cmpxchg_ptr(1, &compiling, 0)) return 0;
 
-#ifdef T2EE_PRINT_STATISTICS
+#ifdef T2_PRINT_STATISTICS
   clock_t compile_time = clock();
 #endif
 
-#ifdef T2EE_PRINT_COMPILATION
-  if (t2ee_print_compilation || PrintAssembly) {
+#ifdef T2_PRINT_COMPILATION
+  if (PrintCompilation || PrintAssembly) {
     fprintf(stderr, "Compiling %d %c%c %s\n",
 	compiled_methods,
 	method->is_synchronized() ? 'S' : ' ',
@@ -7668,7 +7098,7 @@
 
   memset(bc_stackinfo, 0, code_size * sizeof(unsigned));
   memset(locals_info, 0, method->max_locals() * sizeof(unsigned));
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
   memset(start_bci, 0xff, sizeof(start_bci));
   memset(end_bci, 0xff, sizeof(end_bci));
 #endif
@@ -7684,13 +7114,13 @@
   jinfo_str.bc_stackinfo = bc_stackinfo;
   jinfo_str.locals_info = locals_info;
   jinfo_str.compiled_return = 0;
-  jinfo_str.zombie_bytes = 0;
+  for (int i = 0; i < 12; i++) jinfo_str.compiled_word_return[i] = 0;
   jinfo_str.is_leaf = 1;
+  jinfo_str.use_istate = method->has_monitor_bytecodes();
 
   Thumb2_local_info_from_sig(&jinfo_str, method, base);
 
-  Thumb2_pass1(&jinfo_str, 0);
-  Thumb2_pass2(&jinfo_str, 0, 0);
+  Thumb2_pass1(&jinfo_str, 0, 0);
 
   codebuf_str.codebuf = (unsigned short *)cb->hp;
   codebuf_str.idx = 0;
@@ -7711,16 +7141,14 @@
   jregs_str.pregs[1] = JAZ_V2;
   jregs_str.pregs[2] = JAZ_V3;
   jregs_str.pregs[3] = JAZ_V4;
-
-#ifndef USE_RLOCAL
   jregs_str.pregs[4] = JAZ_V5;
-#endif
+  jregs_str.pregs[5] = JAZ_V6;
 
   jregs_str.npregs = PREGS;
 
   Thumb2_RegAlloc(&jinfo_str);
 
-  slow_entry = out_align(&codebuf_str, CODE_ALIGN);
+  slow_entry = out_align_offset(&codebuf_str, CODE_ALIGN, SLOW_ENTRY_OFFSET);
   cmethod = (Compiled_Method *)slow_entry;
   slow_entry |= TBIT;
 
@@ -7736,10 +7164,10 @@
     compiled_accessor = 0;
   }
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
   if (DISASS_AFTER == 0 || compiled_methods >= DISASS_AFTER)
     if (PrintAssembly)
-	Thumb2_disass(&jinfo_str);
+      Thumb2_disass(&jinfo_str);
 #endif
 
   for (int i = 0; i < PREGS; i++)
@@ -7747,15 +7175,14 @@
 
   Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf_str.idx * 2);
 
-#ifdef T2EE_PRINT_STATISTICS
+#ifdef T2_PRINT_STATISTICS
   compile_time = clock() - compile_time;
   total_compile_time += compile_time;
 
-  if (t2ee_print_statistics) {
+  if (t2_print_statistics) {
     unsigned codegen = codebuf_str.idx * 2;
     bytecodes_compiled += code_size;
     arm_code_generated += codegen;
-    total_zombie_bytes += jinfo_str.zombie_bytes;
     fprintf(stderr, "%d bytecodes => %d bytes code in %.2f sec, totals: %d => %d in %.2f sec\n",
       code_size, codegen, (double)compile_time/(double)CLOCKS_PER_SEC,
     bytecodes_compiled, arm_code_generated, (double)total_compile_time/(double)CLOCKS_PER_SEC);
@@ -7783,7 +7210,8 @@
   compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
   if (compiled_offset == 0) return 0;
   thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
-  thumb_entry.osr_entry = (unsigned)cmethod->osr_entry | TBIT;
+  thumb_entry.regusage = cmethod->regusage;
+
 #ifdef THUMB2_JVMTI
   {
     // we need to dispatch a compiled_method_load event
@@ -7814,6 +7242,7 @@
 		address_bci_map, NULL);
   }
 #endif // THUMB2_JVMTI
+
   return *(unsigned long long *)&thumb_entry;
 }
 
@@ -7823,6 +7252,7 @@
 extern "C" void Thumb2_Handle_Exception_NoRegs(void);
 extern "C" void Thumb2_Exit_To_Interpreter(void);
 extern "C" void Thumb2_Stack_Overflow(void);
+extern "C" void Thumb2_monitorenter(void);
 
 extern "C" void __divsi3(void);
 extern "C" void __aeabi_ldivmod(void);
@@ -7835,6 +7265,7 @@
 extern "C" void Helper_new(void);
 extern "C" void Helper_instanceof(void);
 extern "C" void Helper_checkcast(void);
+extern "C" void Helper_monitorexit(void);
 extern "C" void Helper_aastore(void);
 extern "C" void Helper_aputfield(void);
 extern "C" void Helper_synchronized_enter(void);
@@ -7879,6 +7310,11 @@
 extern char Thumb2_putstatic_a_stub[];
 extern char Thumb2_putstatic_dw_stub[];
 
+extern char Thumb2_invokestaticresolved_stub[];
+extern char Thumb2_invokespecialresolved_stub[];
+extern char Thumb2_invokevirtualresolved_stub[];
+extern char Thumb2_invokevfinalresolved_stub[];
+
 #define STUBS_SIZE	(Thumb2_stubs_end-Thumb2_stubs)
 #define IDIV_STUB		(Thumb2_idiv_stub-Thumb2_stubs)
 #define IREM_STUB		(Thumb2_irem_stub-Thumb2_stubs)
@@ -7907,13 +7343,18 @@
 #define PUTSTATIC_A_STUB	(Thumb2_putstatic_a_stub-Thumb2_stubs)
 #define PUTSTATIC_DW_STUB	(Thumb2_putstatic_dw_stub-Thumb2_stubs)
 
+#define INVOKESTATIC_RESOLVED_STUB (Thumb2_invokestaticresolved_stub-Thumb2_stubs)
+#define INVOKESPECIAL_RESOLVED_STUB (Thumb2_invokespecialresolved_stub-Thumb2_stubs)
+#define INVOKEVIRTUAL_RESOLVED_STUB (Thumb2_invokevirtualresolved_stub-Thumb2_stubs)
+#define INVOKEVFINAL_RESOLVED_STUB (Thumb2_invokevfinalresolved_stub-Thumb2_stubs)
+
 extern "C" void Thumb2_NullPtr_Handler(void);
 
 
 extern "C" int Thumb2_Check_Null(unsigned *regs, unsigned pc)
 {
   Thumb2_CodeBuf *cb = thumb2_codebuf;
-  if (!(CPUInfo & ARCH_THUMBEE)) return 0;
+  if (!(CPUInfo & ARCH_THUMB2)) return 0;
   if (IS_COMPILED(pc, cb)) {
     regs[ARM_LR] = pc;
     regs[ARM_PC] = (unsigned)Thumb2_NullPtr_Handler;
@@ -7931,22 +7372,22 @@
   u32 loc_irem, loc_idiv, loc_ldiv;
   int rc;
 
-  if (!(CPUInfo & ARCH_THUMBEE)) {
+  if (!(CPUInfo & ARCH_THUMB2)) {
     UseCompiler = false;
     return;
   }
 
-#ifdef T2EE_PRINT_COMPILATION
-  t2ee_print_compilation = getenv("T2EE_PRINT_COMPILATION");
+#ifdef T2_PRINT_COMPILATION
+  PrintCompilation |= getenv("T2_PRINT_COMPILATION") != NULL;
 #endif
-#ifdef T2EE_PRINT_STATISTICS
-  t2ee_print_statistics = getenv("T2EE_PRINT_STATISTICS");
+#ifdef T2_PRINT_STATISTICS
+  t2_print_statistics = getenv("T2_PRINT_STATISTICS");
 #endif
-#ifdef T2EE_PRINT_DISASS
-  PrintAssembly |= getenv("T2EE_PRINT_DISASS") != NULL;
+#ifdef T2_PRINT_DISASS
+  PrintAssembly |= getenv("T2_PRINT_DISASS") != NULL;
 #endif
-#ifdef T2EE_PRINT_REGUSAGE
-  t2ee_print_regusage = getenv("T2EE_PRINT_REGUSAGE");
+#ifdef T2_PRINT_REGUSAGE
+  t2_print_regusage = getenv("T2_PRINT_REGUSAGE");
 #endif
 
   cb = (Thumb2_CodeBuf *)mmap(0, THUMB2_CODEBUF_SIZE, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
@@ -7968,7 +7409,6 @@
     return;
   }
 
-#if 1
 #ifdef THUMB2_JVMTI
   // cache the start of the generated stub region for notification later
   stub_gen_code_start = cb->hp;
@@ -7981,7 +7421,7 @@
 
   handlers[H_IDIV] = (unsigned)(cb->hp + IDIV_STUB);
   handlers[H_IREM] = (unsigned)(cb->hp + IREM_STUB);
-  handlers[H_INVOKEINTERFACE] = (unsigned)(cb->hp + INVOKEINTERFACE_STUB);
+handlers[H_INVOKEINTERFACE] = (unsigned)(cb->hp + INVOKEINTERFACE_STUB);
   handlers[H_INVOKEVIRTUAL] = (unsigned)(cb->hp + INVOKEVIRTUAL_STUB);
   handlers[H_INVOKESTATIC] = (unsigned)(cb->hp + INVOKESTATIC_STUB);
   handlers[H_INVOKESPECIAL] = (unsigned)(cb->hp + INVOKESPECIAL_STUB);
@@ -7992,6 +7432,11 @@
   handlers[H_GETFIELD_SB] = (unsigned)(cb->hp + GETFIELD_SB_STUB);
   handlers[H_GETFIELD_DW] = (unsigned)(cb->hp + GETFIELD_DW_STUB);
 
+  handlers[H_INVOKESTATIC_RESOLVED] = (unsigned)(cb->hp + INVOKESTATIC_RESOLVED_STUB);
+  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKESPECIAL_RESOLVED_STUB);
+  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKEVIRTUAL_RESOLVED_STUB);
+  handlers[H_INVOKEVFINAL] = (unsigned)(cb->hp + INVOKEVFINAL_RESOLVED_STUB);
+
   handlers[H_PUTFIELD_WORD] = (unsigned)(cb->hp + PUTFIELD_WORD_STUB);
   handlers[H_PUTFIELD_H] = (unsigned)(cb->hp + PUTFIELD_H_STUB);
   handlers[H_PUTFIELD_B] = (unsigned)(cb->hp + PUTFIELD_B_STUB);
@@ -8011,7 +7456,19 @@
   handlers[H_PUTSTATIC_DW] = (unsigned)(cb->hp + PUTSTATIC_DW_STUB);
 
   codebuf.idx += (Thumb2_stubs_end-Thumb2_stubs) >> 1;
-#endif
+
+  // Disassemble the codebuf we just created.  For debugging.  This
+  // first part is all ARM code; the part that we're about to create
+  // is Thumb code.
+  if (PrintAssembly) {
+    Hsdis hsdis;
+    hsdis.decode_instructions(cb->hp, cb->hp + codebuf.idx * 2,
+			      print_address, NULL, NULL, stderr,
+			      "");
+    fputc('\n', stderr);
+  }
+
+  char *begin_thumb_code = cb->hp + codebuf.idx * 2;
 
   handlers[H_LDIV] = handlers[H_LREM] = out_pos(&codebuf);
   dop_reg(&codebuf, DP_ORR, ARM_IP, ARM_R2, ARM_R3, 0, 0);
@@ -8121,7 +7578,8 @@
 //   r3 = bci
 //   result -> R0, == 0 => exception
   handlers[H_NEW] = out_pos(&codebuf);
-  mov_reg(&codebuf, ARM_R0, Ristate);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
   ldr_imm(&codebuf, ARM_R2, ARM_R0, ISTATE_METHOD, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_new);
   ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
@@ -8137,14 +7595,15 @@
 //   r3 = bci
 //   result -> thread->vm_result
   handlers[H_NEWARRAY] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, ARM_R3, ARM_R0, ARM_R3);
   mov_reg(&codebuf, ARM_R0, Rthread);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_BCP, 1, 0);
-sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 // ANEWARRAY Stub
@@ -8153,14 +7612,18 @@
 //   r3 = tos
 //   result -> thread->vm_result
   handlers[H_ANEWARRAY] = out_pos(&codebuf);
-sub_imm(&codebuf, ARM_R1, Rstack, 4);
-  str_imm(&codebuf, ARM_R1, Ristate, ISTATE_STACK, 1, 0);
-  ldr_imm(&codebuf, ARM_R1, Ristate, ISTATE_METHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
+
+  sub_imm(&codebuf, ARM_R1, Rstack, 4);
+  str_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+
+  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTANTS, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_IP, ARM_R0);
   mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
   mov_reg(&codebuf, ARM_R0, Rthread);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
@@ -8168,14 +7631,15 @@
 //   r0 = bci
 //   r1 = dimensions (*4)
   handlers[H_MULTIANEWARRAY] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   sub_imm(&codebuf, ARM_R3, Rstack, 4);
   ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
   add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
   add_reg(&codebuf, Rstack, Rstack, ARM_R1);
   mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
   mov_reg(&codebuf, ARM_R0, Rthread);
   sub_imm(&codebuf, ARM_R1, Rstack, 4);
   mov_reg(&codebuf, ARM_PC, ARM_R3);
@@ -8183,45 +7647,91 @@
 // LDC Stub
 //   r0 = bci
   handlers[H_LDC] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   sub_imm(&codebuf, ARM_R3, Rstack, 4);
   ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
   add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
   mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
   mov_reg(&codebuf, ARM_R0, Rthread);
-//  mov_imm(&codebuf, ARM_R1, 0);
+  mov_imm(&codebuf, ARM_R1, 0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// LDC_W Stub
+//   r0 = bci
+  handlers[H_LDC_W] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
+  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_imm(&codebuf, ARM_R1, 1);
   mov_reg(&codebuf, ARM_PC, ARM_R3);
 
 // INSTANCEOF Stub
 //   r1 = index
+//   r2 = tos
 //   r3 = bci
 //   result -> R0, == -1 => exception
   handlers[H_INSTANCEOF] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_instanceof);
-  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R3);
-sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
-  mov_reg(&codebuf, ARM_R0, Ristate);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 // CHECKCAST Stub
 //   r1 = index
+//   r2 = tos
 //   r3 = bci
 //   result -> R0, != 0 => exception
   handlers[H_CHECKCAST] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_checkcast);
-  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R3);
-sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
-  mov_reg(&codebuf, ARM_R0, Ristate);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// MONITORENTER
+//   r0 = bci
+  handlers[H_MONITORENTER] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_monitorenter);
+  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// MONITOREXIT Stub
+//   r1 = tos
+//   r3 = bci
+//   result -> R0, != 0 => exception
+  handlers[H_MONITOREXIT] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_monitorexit);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 // AASTORE Stub
@@ -8230,14 +7740,16 @@
 //   r2 = index
 //   r3 = arrayref
   handlers[H_AASTORE] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
-sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_IP, Ristate, ISTATE_BCP, 1, 0);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_IP, Rstack, 4);
+  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_STACK, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_aastore);
-  mov_reg(&codebuf, ARM_R0, Ristate);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 // APUTFIELD Stub
@@ -8248,16 +7760,20 @@
 
 // SYNCHRONIZED_ENTER Stub
 //   r0 = bci
-//   r1 = monitor
+//   Rstack = monitor
   handlers[H_SYNCHRONIZED_ENTER] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Ristate, ISTATE_METHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
-sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_IP, Ristate, ISTATE_BCP, 1, 0);
+  ldr_imm(&codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R2, ARM_R2, ARM_R0);
+  str_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
+
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R1, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+
   mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_enter);
   mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_reg(&codebuf, ARM_R1, Rstack);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 //
@@ -8265,12 +7781,14 @@
 //   r0 = bci
 //   r1 = monitor
   handlers[H_SYNCHRONIZED_EXIT] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+
+  ldr_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
-sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_IP, Ristate, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+  str_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_exit);
   mov_reg(&codebuf, ARM_R0, Rthread);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
@@ -8309,10 +7827,11 @@
 // EXIT_TO_INTERPRETER
 //   r0 = bci
   handlers[H_EXIT_TO_INTERPRETER] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R1, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Exit_To_Interpreter);
+  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Exit_To_Interpreter);
   mov_reg(&codebuf, ARM_PC, ARM_R3);
 
 // H_SAFEPOINT
@@ -8321,10 +7840,11 @@
 
   // Set up BytecodeInterpreter->_bcp for the GC
   // bci+CONSTMETHOD_CODEOFFSET is passed in ARM_R1
-  ldr_imm(&codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
+  // istate is passed in ARM_R2
+  ldr_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_METHOD, 1, 0);
   ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_BCP, 1, 0);
 
   mov_imm(&codebuf, ARM_IP, (u32)Helper_SafePoint);
   mov_reg(&codebuf, ARM_R0, Rthread);
@@ -8343,10 +7863,10 @@
   // Disassemble the codebuf we just created.  For debugging
   if (PrintAssembly) {
     Hsdis hsdis;
-    hsdis.decode_instructions(cb->hp, cb->hp + codebuf.idx * 2,
-			      print_address, NULL, NULL, stdout,
+    hsdis.decode_instructions(begin_thumb_code, cb->hp + codebuf.idx * 2,
+			      print_address, NULL, NULL, stderr,
 			      "force-thumb");
-    putchar('\n');
+    fputc('\n', stderr);
   }
 
   Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf.idx * 2);
@@ -8360,6 +7880,6 @@
 #endif // THUMB2_JVMTI
 }
 
-#endif // THUMB2EE
+#endif // T2JIT
 
 #endif // __arm__