changeset 7011:c84c5aae6873

PR2228 - Add ARM32 JIT
author Edward Nevill <ed@camswl.com>
date Mon, 16 Feb 2015 13:18:10 +0000
parents e0d71a0dd6c6
children 6d54b1140edf
files make/linux/makefiles/zeroshark.make src/cpu/zero/vm/arm32JIT.cpp src/cpu/zero/vm/thumb2.cpp
diffstat 3 files changed, 8585 insertions(+), 7987 deletions(-) [+]
line wrap: on
line diff
--- a/make/linux/makefiles/zeroshark.make	Wed Apr 16 16:09:05 2014 +0100
+++ b/make/linux/makefiles/zeroshark.make	Mon Feb 16 13:18:10 2015 +0000
@@ -33,12 +33,12 @@
 ifeq ($(ARM32JIT),true)
 Obj_Files += asm_helper.o
 Obj_Files += cppInterpreter_arm.o
-Obj_Files += thumb2.o
+Obj_Files += arm32JIT.o
 
 CFLAGS += -DHOTSPOT_ASM
 
 cppInterpreter_arm.o:	offsets_arm.s bytecodes_arm.s
-thumb2.o:		offsets_arm.s
+arm32JIT.o:		offsets_arm.s
 
 offsets_arm.s:	mkoffsets
 	@echo Generating assembler offsets
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/zero/vm/arm32JIT.cpp	Mon Feb 16 13:18:10 2015 +0000
@@ -0,0 +1,8583 @@
+/*
+ * Copyright 2009, 2010 Edward Nevill
+ * Copyright 2012, 2013 Red Hat
+ * Copyright 2014, 2015 Linaro Ltd
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#if defined(__arm__)
+
+#undef T2JIT
+#if !defined(DISABLE_THUMB2) && defined(HOTSPOT_ASM) && !defined(SHARK)
+#define T2JIT
+#endif
+
+unsigned int Thumb2 = 1;
+
+#ifdef T2JIT
+
+// setting DISABLE_THUMB2_JVMTI at build time disables notification
+// of JVMTI dynamic_generate and compiled_method_load events
+#undef THUMB2_JVMTI
+#if !defined(DISABLE_THUMB2_JVMTI)
+#define THUMB2_JVMTI
+#endif
+
+#define T2_PRINT_COMPILATION
+#define T2_PRINT_STATISTICS
+#define T2_PRINT_DISASS
+#define T2_PRINT_REGUSAGE
+
+#define T2EE_PRINT_REGUSAGE
+#define CODE_ALIGN 64
+
+#define SLOW_ENTRY_OFFSET 24
+#define FAST_ENTRY_OFFSET 40
+
+#ifdef T2_PRINT_STATISTICS
+static char *t2_print_statistics;
+#endif
+
+#ifdef T2_PRINT_REGUSAGE
+static char *t2_print_regusage;
+#endif
+
+static char *t2_ospace;
+#define OSPACE t2_ospace
+
+#ifdef PRODUCT
+#define THUMB2_CODEBUF_SIZE (8 * 1024 * 1024)
+#else
+#define THUMB2_CODEBUF_SIZE (4 * 1024 * 1024)
+#endif
+#define THUMB2_MAX_BYTECODE_SIZE 10000
+#define THUMB2_MAX_T2CODE_SIZE 65000
+#define THUMB2_MAXLOCALS 1000
+
+#include <sys/mman.h>
+#include <ucontext.h>
+#include "precompiled.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "compiler/compilerOracle.hpp"
+
+#define opc_nop			0x00
+#define opc_aconst_null		0x01
+#define opc_iconst_m1		0x02
+#define opc_iconst_0		0x03
+#define opc_iconst_1		0x04
+#define opc_iconst_2		0x05
+#define opc_iconst_3		0x06
+#define opc_iconst_4		0x07
+#define opc_iconst_5		0x08
+#define opc_lconst_0		0x09
+#define opc_lconst_1		0x0a
+#define opc_fconst_0		0x0b
+#define opc_fconst_1		0x0c
+#define opc_fconst_2		0x0d
+#define opc_dconst_0		0x0e
+#define opc_dconst_1		0x0f
+#define opc_bipush		0x10
+#define opc_sipush		0x11
+#define opc_ldc			0x12
+#define opc_ldc_w		0x13
+#define opc_ldc2_w		0x14
+#define opc_iload		0x15
+#define opc_lload		0x16
+#define opc_fload		0x17
+#define opc_dload		0x18
+#define opc_aload		0x19
+#define opc_iload_0		0x1a
+#define opc_iload_1		0x1b
+#define opc_iload_2		0x1c
+#define opc_iload_3		0x1d
+#define opc_lload_0		0x1e
+#define opc_lload_1		0x1f
+#define opc_lload_2		0x20
+#define opc_lload_3		0x21
+#define opc_fload_0		0x22
+#define opc_fload_1		0x23
+#define opc_fload_2		0x24
+#define opc_fload_3		0x25
+#define opc_dload_0		0x26
+#define opc_dload_1		0x27
+#define opc_dload_2		0x28
+#define opc_dload_3		0x29
+#define opc_aload_0		0x2a
+#define opc_aload_1		0x2b
+#define opc_aload_2		0x2c
+#define opc_aload_3		0x2d
+#define opc_iaload		0x2e
+#define opc_laload		0x2f
+#define opc_faload		0x30
+#define opc_daload		0x31
+#define opc_aaload		0x32
+#define opc_baload		0x33
+#define opc_caload		0x34
+#define opc_saload		0x35
+#define opc_istore		0x36
+#define opc_lstore		0x37
+#define opc_fstore		0x38
+#define opc_dstore		0x39
+#define opc_astore		0x3a
+#define opc_istore_0		0x3b
+#define opc_istore_1		0x3c
+#define opc_istore_2		0x3d
+#define opc_istore_3		0x3e
+#define opc_lstore_0		0x3f
+#define opc_lstore_1		0x40
+#define opc_lstore_2		0x41
+#define opc_lstore_3		0x42
+#define opc_fstore_0		0x43
+#define opc_fstore_1		0x44
+#define opc_fstore_2		0x45
+#define opc_fstore_3		0x46
+#define opc_dstore_0		0x47
+#define opc_dstore_1		0x48
+#define opc_dstore_2		0x49
+#define opc_dstore_3		0x4a
+#define opc_astore_0		0x4b
+#define opc_astore_1		0x4c
+#define opc_astore_2		0x4d
+#define opc_astore_3		0x4e
+#define opc_iastore		0x4f
+#define opc_lastore		0x50
+#define opc_fastore		0x51
+#define opc_dastore		0x52
+#define opc_aastore		0x53
+#define opc_bastore		0x54
+#define opc_castore		0x55
+#define opc_sastore		0x56
+#define opc_pop			0x57
+#define opc_pop2		0x58
+#define opc_dup			0x59
+#define opc_dup_x1		0x5a
+#define opc_dup_x2		0x5b
+#define opc_dup2		0x5c
+#define opc_dup2_x1		0x5d
+#define opc_dup2_x2		0x5e
+#define opc_swap		0x5f
+#define opc_iadd		0x60
+#define opc_ladd		0x61
+#define opc_fadd		0x62
+#define opc_dadd		0x63
+#define opc_isub		0x64
+#define opc_lsub		0x65
+#define opc_fsub		0x66
+#define opc_dsub		0x67
+#define opc_imul		0x68
+#define opc_lmul		0x69
+#define opc_fmul		0x6a
+#define opc_dmul		0x6b
+#define opc_idiv		0x6c
+#define opc_ldiv		0x6d
+#define opc_fdiv		0x6e
+#define opc_ddiv		0x6f
+#define opc_irem		0x70
+#define opc_lrem		0x71
+#define opc_frem		0x72
+#define opc_drem		0x73
+#define opc_ineg		0x74
+#define opc_lneg		0x75
+#define opc_fneg		0x76
+#define opc_dneg		0x77
+#define opc_ishl		0x78
+#define opc_lshl		0x79
+#define opc_ishr		0x7a
+#define opc_lshr		0x7b
+#define opc_iushr		0x7c
+#define opc_lushr		0x7d
+#define opc_iand		0x7e
+#define opc_land		0x7f
+#define opc_ior			0x80
+#define opc_lor			0x81
+#define opc_ixor		0x82
+#define opc_lxor		0x83
+#define opc_iinc		0x84
+#define opc_i2l			0x85
+#define opc_i2f			0x86
+#define opc_i2d			0x87
+#define opc_l2i			0x88
+#define opc_l2f			0x89
+#define opc_l2d			0x8a
+#define opc_f2i			0x8b
+#define opc_f2l			0x8c
+#define opc_f2d			0x8d
+#define opc_d2i			0x8e
+#define opc_d2l			0x8f
+#define opc_d2f			0x90
+#define opc_i2b			0x91
+#define opc_i2c			0x92
+#define opc_i2s			0x93
+#define opc_lcmp		0x94
+#define opc_fcmpl		0x95
+#define opc_fcmpg		0x96
+#define opc_dcmpl		0x97
+#define opc_dcmpg		0x98
+#define opc_ifeq		0x99
+#define opc_ifne		0x9a
+#define opc_iflt		0x9b
+#define opc_ifge		0x9c
+#define opc_ifgt		0x9d
+#define opc_ifle		0x9e
+#define opc_if_icmpeq		0x9f
+#define opc_if_icmpne		0xa0
+#define opc_if_icmplt		0xa1
+#define opc_if_icmpge		0xa2
+#define opc_if_icmpgt		0xa3
+#define opc_if_icmple		0xa4
+#define opc_if_acmpeq		0xa5
+#define opc_if_acmpne		0xa6
+#define opc_goto		0xa7
+#define opc_jsr			0xa8
+#define opc_ret			0xa9
+#define opc_tableswitch		0xaa
+#define opc_lookupswitch	0xab
+#define opc_ireturn		0xac
+#define opc_lreturn		0xad
+#define opc_freturn		0xae
+#define opc_dreturn		0xaf
+#define opc_areturn		0xb0
+#define opc_return		0xb1
+#define opc_getstatic		0xb2
+#define opc_putstatic		0xb3
+#define opc_getfield		0xb4
+#define opc_putfield		0xb5
+#define opc_invokevirtual	0xb6
+#define opc_invokespecial	0xb7
+#define opc_invokestatic	0xb8
+#define opc_invokeinterface	0xb9
+#define opc_invokedynamic	0xba
+#define opc_new			0xbb
+#define opc_newarray		0xbc
+#define opc_anewarray		0xbd
+#define opc_arraylength		0xbe
+#define opc_athrow		0xbf
+#define opc_checkcast		0xc0
+#define opc_instanceof		0xc1
+#define opc_monitorenter	0xc2
+#define opc_monitorexit		0xc3
+#define opc_wide		0xc4
+#define opc_multianewarray	0xc5
+#define opc_ifnull		0xc6
+#define opc_ifnonnull		0xc7
+#define opc_goto_w		0xc8
+#define opc_jsr_w		0xc9
+#define opc_breakpoint		0xca
+
+#define OPC_LAST_JAVA_OP	0xca
+
+#define opc_fast_aldc		Bytecodes::_fast_aldc
+#define opc_fast_aldc_w		Bytecodes::_fast_aldc_w
+
+#define opc_bgetfield			0xcc
+#define opc_cgetfield			0xcd
+#define opc_igetfield			0xd0
+#define opc_lgetfield			0xd1
+#define opc_sgetfield			0xd2
+#define opc_aputfield			0xd3
+#define opc_bputfield			0xd4
+#define opc_cputfield			0xd5
+#define opc_iputfield			0xd8
+#define opc_lputfield			0xd9
+#define opc_iaccess_0			0xdb
+#define opc_iaccess_1			0xdc
+#define opc_iaccess_2			0xdd
+#define opc_iaccess_3			0xde
+#define opc_invokeresolved		0xdf
+#define opc_invokespecialresolved	0xe0
+#define opc_invokestaticresolved	0xe1
+#define opc_invokevfinal		0xe2
+#define opc_iload_iload			0xe3
+
+#define opc_return_register_finalizer   0xe7
+#define opc_dmac                        0xe8
+#define opc_iload_0_iconst_N            0xe9
+#define opc_iload_1_iconst_N            0xea
+#define opc_iload_2_iconst_N            0xeb
+#define opc_iload_3_iconst_N            0xec
+#define opc_iload_iconst_N              0xed
+#define opc_iadd_istore_N               0xee
+#define opc_isub_istore_N               0xef
+#define opc_iand_istore_N               0xf0
+#define opc_ior_istore_N                0xf1
+#define opc_ixor_istore_N               0xf2
+#define opc_iadd_u4store                0xf3
+#define opc_isub_u4store                0xf4
+#define opc_iand_u4store                0xf5
+#define opc_ior_u4store                 0xf6
+#define opc_ixor_u4store                0xf7
+#define opc_iload_0_iload               0xf8
+#define opc_iload_1_iload               0xf9
+#define opc_iload_2_iload               0xfa
+#define opc_iload_3_iload               0xfb
+#define opc_iload_0_iload_N             0xfc
+#define opc_iload_1_iload_N             0xfd
+#define opc_iload_2_iload_N             0xfe
+#define opc_iload_3_iload_N             0xff
+
+
+#define H_IREM				0
+#define H_IDIV				1
+#define H_LDIV				2
+#define H_LREM				3
+#define H_FREM				4
+#define H_DREM				5
+#define	H_LDC				6
+#define H_NEW				8
+#define H_I2F				9
+#define H_I2D				10
+#define H_L2F				11
+#define H_L2D				12
+#define H_F2I				13
+#define H_F2L				14
+#define H_F2D				15
+#define H_D2I				16
+#define H_D2L				17
+#define H_D2F				18
+#define H_NEWARRAY			19
+#define H_ANEWARRAY			20
+#define H_MULTIANEWARRAY		21
+#define H_INSTANCEOF			22
+#define H_CHECKCAST			23
+#define H_AASTORE			24
+#define H_APUTFIELD			25
+#define H_SYNCHRONIZED_ENTER		26
+#define H_SYNCHRONIZED_EXIT		27
+
+#define H_EXIT_TO_INTERPRETER		28
+
+#define H_RET				H_EXIT_TO_INTERPRETER
+#define H_DEADCODE			H_EXIT_TO_INTERPRETER
+#define H_ATHROW			H_EXIT_TO_INTERPRETER
+
+#define H_HANDLE_EXCEPTION		29
+#define H_ARRAYBOUND			30
+
+#define H_LDC_W				31
+
+#define H_DEBUG_METHODENTRY		32
+#define H_DEBUG_METHODEXIT		33
+#define H_DEBUG_METHODCALL		34
+
+#define H_INVOKEINTERFACE		35
+#define H_INVOKEVIRTUAL			36
+#define H_INVOKESTATIC			37
+#define H_INVOKESPECIAL			38
+
+#define H_GETFIELD_WORD			39
+#define H_GETFIELD_SH			40
+#define H_GETFIELD_H			41
+#define H_GETFIELD_SB			42
+#define H_GETFIELD_DW			43
+
+#define H_PUTFIELD_WORD			44
+#define H_PUTFIELD_H			45
+#define H_PUTFIELD_B			46
+#define H_PUTFIELD_A			47
+#define H_PUTFIELD_DW			48
+
+#define H_GETSTATIC_WORD		49
+#define H_GETSTATIC_SH			50
+#define H_GETSTATIC_H			51
+#define H_GETSTATIC_SB			52
+#define H_GETSTATIC_DW			53
+
+#define H_PUTSTATIC_WORD		54
+#define H_PUTSTATIC_H			55
+#define H_PUTSTATIC_B			56
+#define H_PUTSTATIC_A			57
+#define H_PUTSTATIC_DW			58
+
+#define H_STACK_OVERFLOW		59
+
+#define H_HANDLE_EXCEPTION_NO_REGS	60
+
+#define H_INVOKESTATIC_RESOLVED		61
+#define H_INVOKESPECIAL_RESOLVED	62
+#define H_INVOKEVIRTUAL_RESOLVED	63
+#define H_INVOKEVFINAL			64
+
+#define H_MONITORENTER			65
+#define H_MONITOREXIT			66
+
+#define H_SAFEPOINT              	67
+
+#define H_LAST                          68  // Not used
+
+unsigned handlers[H_LAST];
+
+#define LEAF_STACK_SIZE			200
+#define STACK_SPARE			40
+
+#define COMPILER_RESULT_FAILED	1	// Failed to compiled this method
+#define COMPILER_RESULT_FATAL	2	// Fatal - dont try compile anything ever again
+
+#include <setjmp.h>
+
+static jmp_buf compiler_error_env;
+
+#define J_BogusImplementation() longjmp(compiler_error_env, COMPILER_RESULT_FAILED)
+
+#ifdef PRODUCT
+
+//#define JASSERT(cond, msg)	
+//#define J_Unimplemented() longjmp(compiler_error_env, COMPILER_RESULT_FATAL)
+#define JASSERT(cond, msg)	do { if (!(cond)) fatal(msg); } while (0)
+#define J_Unimplemented()       { report_unimplemented(__FILE__, __LINE__); BREAKPOINT; }
+#define JDEBUG_( _j_ )        
+
+#else
+
+#define JASSERT(cond, msg)	do { if (!(cond)) fatal(msg); } while (0)
+#define J_Unimplemented()       { report_unimplemented(__FILE__, __LINE__); BREAKPOINT; }
+#define JDEBUG_( _j_ )          _j_
+
+#endif // PRODUCT
+
+#define GET_NATIVE_U2(p)	(*(unsigned short *)(p))
+#define GET_NATIVE_U4(p)	(*(unsigned *)(p))
+
+#define GET_JAVA_S1(p)		(((signed char *)(p))[0])
+#define GET_JAVA_S2(p)  	((((signed char *)(p))[0] << 8) + (p)[1])
+#define GET_JAVA_U2(p)		(((p)[0] << 8) + (p)[1])
+#define GET_JAVA_U4(p)		(((p)[0] << 24) + ((p)[1] << 16) + ((p)[2] << 8) + (p)[3])
+
+#define BYTESEX_REVERSE(v) (((v)<<24) | (((v)<<8) & 0xff0000) | (((v)>>8) & 0xff00) | ((v)>>24))
+#define BYTESEX_REVERSE_U2(v) (((v)<<8) | ((v)>>8))
+
+// n.b. this value is chosen because it is an illegal thumb2 instruction
+#define THUMB2_POLLING_PAGE_MAGIC 0xdead
+#define ARM_POLLING_PAGE_MAGIC    0xf0bef0be
+
+typedef struct Thumb2_CodeBuf {
+  unsigned size;
+  char *sp;
+  char *hp;
+} Thumb2_CodeBuf;
+
+Thumb2_CodeBuf *thumb2_codebuf;
+
+// We support any arch >= ARM v6. The differences in the archs are:-
+// ARMv6: 	No Thumb2 support, No LDREXD/STREXD
+// ARMv6k:	No Thumb2 support, LDREXD/STREXD
+// >=ARMv6t2:	Thumb2 support, LDREXD/STREXD
+// Note: That support for vanilla ARMv6 (as on Raspberry PI) is only available
+// on non-MP systems because of the lack of the necessary LDREXD/STREXD instructions
+// fortunately Raspberry PI is single core.
+#define ARCH_GE_V6(cpuinfo)	(((cpuinfo) & 0xffff) >= (1<<6))
+#define ARCH_IS_V6(cpuinfo)	(((cpuinfo) & 0xffff) == (1<<6))
+#define ARCH_GE_V6T2(cpuinfo)   (ARCH_GE_V6(cpuinfo) && ((cpuinfo) & ARCH_THUMB2))
+#define ARCH_GE_V6K(cpuinfo)	(ARCH_GE_V6T2(cpuinfo) || ARCH_IS_V6(cpuinfo) && os::is_MP())
+
+extern unsigned CPUInfo;
+
+unsigned bc_stackinfo[THUMB2_MAX_BYTECODE_SIZE];
+unsigned locals_info[1000];
+unsigned stack[1000];
+unsigned r_local[1000];
+
+#ifdef THUMB2_JVMTI
+// jvmti needs to map start address of generated code for a bytecode
+// to corresponding bytecode index so agents can correlate code address
+// ranges with bci and thence line number
+static jvmtiAddrLocationMap *address_bci_map = NULL;
+static jint address_bci_map_length = 0;
+
+static void *stub_gen_code_start = 0;
+static void *stub_gen_code_end = 0;
+
+// function used to lazily initialize the address to bci translation map
+// the first time a compiled method is generated.
+static void address_bci_map_init(JavaThread *thread)
+{
+  // the dynamic_code_generated event posted to notify generation of
+  // the stub code has to be posted lazily because generation happens
+  // in Thumb2_Initialize under bci_init and the latter is called as a
+  // side-effect of loading libjvm.o. we don't have a Java thread at
+  // that point nor, indeed, any agents to catch the notify. so the
+  // info cached by Thumb2_Initialize needs to be posted when the
+  // first compiled method load event is notified, at which point we
+  // will indeed have a current thread.
+
+  {
+    // a thread transition from in Java to in VM is required before
+    // calling into Jvmti
+
+    ThreadInVMfromJava transition(thread);
+
+    JvmtiExport::post_dynamic_code_generated("thumb2_dynamic_stubs_block",
+					   stub_gen_code_start,
+					   stub_gen_code_end);
+
+    // n.b. exiting this block reverts the thread state to in Java
+  }
+  
+
+  // the map is lazily allocated so we don't use the space unless we
+  // are actually using the JIT
+
+  // at worst we need a start address for every bytecode so
+  // the map size is limited by the compiler's bytecode limit
+  address_bci_map = new jvmtiAddrLocationMap[THUMB2_MAX_BYTECODE_SIZE];
+}
+
+// clear the address to bci translation map
+static void address_bci_map_reset(JavaThread *thread)
+{
+  // this only gets called after obtaining the compiler lock so there
+  // is no need to worry about races
+  
+  if (address_bci_map == NULL) {
+    address_bci_map_init(thread);
+  }
+
+  // this effectively clears the previous map
+
+  address_bci_map_length = 0;
+}
+
+// add an entry to the address to bci translation map
+// this will never exceed the available space
+static void address_bci_map_add(void *addr, unsigned bci)
+{
+    address_bci_map[address_bci_map_length].start_address = addr;
+    address_bci_map[address_bci_map_length].location = bci;
+    address_bci_map_length++;
+}
+#endif // THUMB2_JVMTI
+
+#ifdef T2_PRINT_DISASS
+
+short start_bci[THUMB2_MAX_T2CODE_SIZE];
+#define START_BCI(idx) (((idx) < THUMB2_MAX_T2CODE_SIZE) ? start_bci[idx] : -1)
+#define SET_START_BCI(idx, bci) (((idx) < THUMB2_MAX_T2CODE_SIZE) ? (start_bci[idx] = (bci)) : 0)
+
+short end_bci[THUMB2_MAX_T2CODE_SIZE];
+#define END_BCI(idx) (((idx) < THUMB2_MAX_T2CODE_SIZE) ? end_bci[idx] : -1)
+#define SET_END_BCI(idx, bci) (((idx) < THUMB2_MAX_T2CODE_SIZE) ? (end_bci[idx] = (bci)) : 0)
+
+#else
+
+#define START_BCI(idx)	-1
+#define SET_START_BCI(idx, bci) 0
+#define END_BCI(idx)	-1
+#define SET_END_BCI(idx, bci) 0
+
+#endif
+
+bool DebugSwitch = false;
+
+// XXX hardwired constants!
+#define ENTRY_FRAME             1
+#define INTERPRETER_FRAME       2
+#define SHARK_FRAME             3
+#define FAKE_STUB_FRAME         4
+
+#include "offsets_arm.s"
+
+#define BC_FLAGS_MASK		0xf0000000
+#define BC_VISITED		0x80000000
+#define BC_BRANCH_TARGET	0x40000000
+#define BC_COMPILED		0x20000000
+#define BC_BACK_TARGET		0x10000000
+
+#define IS_DEAD(x)	(((x) & BC_VISITED) == 0)
+
+#define LOCAL_MODIFIED		31
+#define LOCAL_REF		30
+#define LOCAL_DOUBLE		29
+#define LOCAL_FLOAT		28
+#define LOCAL_LONG		27
+#define LOCAL_INT		26
+#define LOCAL_ALLOCATED		25
+
+#define LOCAL_COUNT_BITS	10
+#define LOCAL_READ_POS		0
+#define LOCAL_WRITE_POS		LOCAL_COUNT_BITS
+
+#define LOCAL_READS(x)		(((x) >> LOCAL_READ_POS) & ((1<<LOCAL_COUNT_BITS)-1))
+#define LOCAL_WRITES(x)		(((x) >> LOCAL_WRITE_POS) & ((1<<LOCAL_COUNT_BITS)-1))
+#define LOCAL_SET_COUNTS(r, w)	(((r) << LOCAL_READ_POS) | (((w) << LOCAL_WRITE_POS)))
+#define LOCAL_INC_COUNT(c)	((c) < ((1<<LOCAL_COUNT_BITS)-1) ? (c)+1 : (c))
+
+#define STACK_REGS	4
+#define FP_STACK_REGS	4
+
+typedef unsigned	u32;
+typedef unsigned	Reg;
+
+#define	ARM_R0		0
+#define ARM_R1		1
+#define ARM_R2		2
+#define ARM_R3		3
+#define ARM_R4		4
+#define ARM_R5		5
+#define ARM_R6		6
+#define ARM_R7		7
+#define ARM_R8		8
+#define ARM_R9		9
+#define ARM_R10		10
+#define ARM_R11		11
+#define ARM_IP		12
+#define ARM_SP		13
+#define ARM_LR		14
+#define ARM_PC		15
+#define ARM_CPSR	16	// CPSR in sigcontext
+#define ARM_FAULT	17	// fault address in sigcontext
+
+#define CPSR_THUMB_BIT	(1<<5)
+
+#define VFP_S0		32
+#define VFP_S1		33
+#define VFP_S2		34
+#define VFP_S3		35
+#define VFP_S4		36
+#define VFP_S5		37
+#define VFP_S6		38
+#define VFP_S7		39
+
+#define VFP_D0		64
+#define VFP_D1		65
+#define VFP_D2		66
+#define VFP_D3		67
+#define VFP_D4		68
+#define VFP_D5		69
+#define VFP_D6		70
+#define VFP_D7		71
+
+#define PREGS	6
+
+#define JAZ_V1	ARM_R5
+#define JAZ_V2	ARM_R6
+#define JAZ_V3	ARM_R7
+#define JAZ_V4	ARM_R8
+#define JAZ_V5	ARM_R9
+#define JAZ_V6	ARM_R11
+
+#define Rstack		ARM_R4
+#define Rlocals		ARM_R7
+#define Ristate		ARM_R8
+#define Rthread		ARM_R10
+
+#define Rint_jpc	ARM_R5
+
+#define IS_ARM_INT_REG(r) ((r) <= ARM_PC)
+#define IS_ARM_FP_REG(r) (!IS_ARM_INT_REG(r))
+
+#define I_REGSET	((1<<ARM_R4) | (1<<ARM_R5) | (1<<ARM_R6) | (1<<ARM_R7) | \
+			 (1<<ARM_R9) | (1<<ARM_R10) | (1<<ARM_R11))
+#define C_REGSET	(1<<ARM_R8)
+
+#define LOG2(n) binary_log2(n)
+
+unsigned binary_log2(unsigned n)
+{
+  unsigned r = 0;
+  if ((n & 0xffff) == 0) r = 16, n >>= 16;
+  if ((n & 0xff) == 0) r += 8, n >>= 8;
+  if ((n & 0xf) == 0) r += 4, n >>= 4;
+  if ((n & 3) == 0) r += 2, n >>= 2;
+  if ((n & 1) == 0) r += 1;
+  return r;
+}
+
+typedef struct Compiled_Method {
+    // All entry points aligned on a cache line boundary
+    //		.align	CODE_ALIGN
+    // slow_entry:				@ callee save interface
+    // 		push	{r4, r5, r6, r7, r9, r10, r11, lr}
+    // 		mov	Rthread, r2
+    // 		bl	fast_entry
+    // 		pop	{r4, r5, r6, r7, r9, r10, r11, pc}
+    unsigned slow_entry[4];
+    unsigned *osr_table;			// pointer to the osr table
+    unsigned *exception_table;
+    Compiled_Method *next;
+    // The next 6 halfword give the register mapping for JAZ_V1 to JAZ_v5
+    // This is used when receovering from an exception so we can push
+    // the register back into the local variables pool.
+    short regusage[6];
+    unsigned header_end[1];
+    // fast_entry:
+    // 		push	{r8, lr}
+    // 		...	@ The compiled code
+    // 		pop	{r8, pc}
+    // 		.align	WORD_ALIGN
+    // code_handle:				@ from interpreted entry
+    // 		.word	slow_entry		@ bottom bit must be set!
+    // osr_table:
+    // 		.word	<no. of entries>
+    // @@@ For bytecode 0 and for each backwards branch target
+    // 		.short	<bytecode index>
+    // 		.short	<code offset>		@ offset in halfwords from slow_entry
+} Compiled_Method;
+
+Compiled_Method *compiled_method_list = 0;
+Compiled_Method **compiled_method_list_tail_ptr = &compiled_method_list;
+
+typedef struct Thumb2_Entrypoint {
+  unsigned compiled_entrypoint;
+  short *regusage;
+} Thumb2_Entrypoint;
+
+typedef struct CodeBuf {
+    unsigned short *codebuf;
+    unsigned idx;
+    unsigned limit;
+} CodeBuf;
+
+typedef struct Thumb2_Stack {
+    unsigned *stack;
+    unsigned depth;
+} Thumb2_Stack;
+
+#define IS_SREG(r) ((r) < STACK_REGS)
+
+typedef struct Thumb2_Registers {
+    unsigned *r_local;
+    unsigned npregs;
+    unsigned pregs[PREGS];
+    int mapping[PREGS];
+} Thumb2_Registers;
+
+typedef struct Thumb2_Info {
+    JavaThread *thread;
+    methodOop method;
+    unsigned *bc_stackinfo;
+    unsigned *locals_info;
+    jubyte *code_base;
+    unsigned code_size;
+    CodeBuf *codebuf;
+    Thumb2_Stack *jstack;
+    Thumb2_Registers *jregs;
+    unsigned compiled_return;
+    unsigned compiled_word_return[12];  // R0 .. R11
+    unsigned is_leaf;
+    unsigned use_istate;
+} Thumb2_Info;
+
+#define IS_INT_SIZE_BASE_TYPE(c) (c=='B' || c=='C' || c=='F' || c=='I' || c=='S' || c=='Z')
+#define IS_INT_SIZE_TYPE(c) (IS_INT_SIZE_BASE_TYPE(c) || c == 'L' || c == '[')
+
+void Thumb2_save_local_refs(Thumb2_Info *jinfo, unsigned stackdepth);
+void Thumb2_restore_local_refs(Thumb2_Info *jinfo, unsigned stackdepth);
+void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth);
+
+static int method_stackchange(const jbyte *base)
+{
+  jbyte c;
+  int stackchange = 0;
+
+  c = *base++;
+  JASSERT(c == '(', "Invalid signature, missing '('");
+  while ((c = *base++) != ')') {
+    stackchange -= 1;
+    if (c == 'J' || c == 'D') {
+      stackchange -= 1;
+    } else if (c == '[') {
+      do { c = *base++; } while (c == '[');
+      if (c == 'L')
+	do { c = *base++; } while (c != ';');
+    } else if (c == 'L') {
+      do { c = *base++; } while (c != ';');
+    } else {
+      JASSERT(IS_INT_SIZE_BASE_TYPE(c), "Invalid signature, bad arg type");
+    }
+  }
+  JASSERT(c == ')', "Invalid signature, missing ')'");
+  c = *base++;
+  if (c == 'J' || c == 'D') stackchange += 2;
+  else if (c != 'V') {
+    stackchange += 1;
+    JASSERT(IS_INT_SIZE_TYPE(c), "Invalid signature, bad ret type");
+  }
+  return stackchange;
+}
+
+static void Thumb2_local_info_from_sig(Thumb2_Info *jinfo, methodOop method,
+				       const jbyte *base)
+{
+  jbyte c;
+  unsigned arg = 0;
+  unsigned *locals_info = jinfo->locals_info;
+  unsigned local_info;
+
+  if (!method->is_static()) locals_info[arg++] = 1 << LOCAL_REF;
+  c = *base++;
+  JASSERT(c == '(', "Invalid signature, missing '('");
+  while ((c = *base++) != ')') {
+    local_info = 1 << LOCAL_INT;
+    if (c == 'J') local_info = 1 << LOCAL_LONG;
+    else if (c == 'D') local_info = 1 << LOCAL_DOUBLE;
+    else if (c == '[') {
+      local_info = 1 << LOCAL_REF;
+      do { c = *base++; } while (c == '[');
+      if (c == 'L')
+	do { c = *base++; } while (c != ';');
+    } else if (c == 'L') {
+      local_info = 1 << LOCAL_REF;
+      do { c = *base++; } while (c != ';');
+    } else {
+      JASSERT(IS_INT_SIZE_BASE_TYPE(c), "Invalid signature, bad arg type");
+    }
+    locals_info[arg++] = local_info;
+  }
+}
+
+#define UNDEFINED_32	0xf7f0a000
+#define UNDEFINED_16	0xde00
+
+static const char *local_types[] = { "int", "long", "float", "double", "ref" };
+
+#ifdef T2_PRINT_DISASS
+
+class Hsdis {
+public:
+
+  typedef void* (*decode_instructions_event_callback_ftype)  (void*, const char*, void*);
+
+  typedef void* (*decode_instructions_ftype)
+    (void* start, void* end,
+     decode_instructions_event_callback_ftype event_callback,
+     void* event_stream,
+     void* printf_callback,
+     void* printf_stream,
+     const char* options);
+
+  decode_instructions_ftype decode_instructions;
+
+  void *lib;
+
+  // Load hsdis-arm.so lazily.
+  Hsdis()
+  {
+    decode_instructions = NULL;
+
+    if (PrintAssembly) {
+      if (lib = dlopen("hsdis-arm.so", RTLD_NOW)) {
+	decode_instructions
+	  = (typeof decode_instructions)dlsym(lib, "decode_instructions");
+      }
+
+      if (! (decode_instructions)) {
+	fprintf (stderr, "PrintAssembly (or T2_PRINT_DISASS) is set, but\n"
+		 "hsdis-arm.so has not been found or is invalid.  If you want to\n"
+		 "see a disassembly, please ensure that a valid copy of\n"
+		 "hsdis-arm.so is present somewhere in your library load path.\n");
+	abort();
+      }
+    }
+  }
+};
+
+static void *print_address(void *stream, const char *tag, void *data);
+
+void Thumb2_disass(Thumb2_Info *jinfo)
+{
+  unsigned code_size = jinfo->code_size;
+  jubyte *code_base = jinfo->code_base;
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  unsigned *locals_info = jinfo->locals_info;
+  unsigned nlocals = jinfo->method->max_locals();
+  int bci = 0;
+  int last_bci = -1;
+  int start_b, end_b;
+  unsigned nodisass;
+
+  unsigned short *codebuf = jinfo->codebuf->codebuf;
+  unsigned idx, compiled_len;
+
+  static Hsdis hsdis;
+
+  fflush(stdout);
+  fflush(stderr);
+
+  compiled_len = jinfo->codebuf->idx * 2;
+  for (idx = 0; idx < compiled_len; ) {
+    nodisass = 0;
+    start_b = START_BCI(idx/2);
+    end_b = END_BCI(idx/2);
+    if (start_b != -1) {
+      last_bci != -1;
+      for (bci = start_b; bci < end_b; ) {
+	unsigned stackinfo = bc_stackinfo[bci];
+	unsigned opcode;
+	int len;
+
+	if (stackinfo & BC_BRANCH_TARGET)
+	  fprintf(stderr, "----- Basic Block -----\n");
+	JASSERT(bci > last_bci, "disass not advancing");
+	last_bci = bci;
+	fprintf(stderr, "%c%4d : ", (stackinfo & BC_VISITED) ? ' ' : '?', bci);
+	opcode = code_base[bci];
+	if (opcode > OPC_LAST_JAVA_OP) {
+	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
+	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
+	}
+	len = Bytecodes::length_for((Bytecodes::Code)opcode);
+	if (len <= 0) {
+	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+	  len = (Bytecodes::special_length_at
+		 (code,
+		  (address)(code_base+bci), (address)(code_base+code_size)));
+	}
+	switch (opcode) {
+	  case opc_tableswitch: {
+	    int nbci = (bci & ~3) + 4;
+	    int low, high;
+	    unsigned w;
+	    unsigned *table;
+	    int def;
+	    unsigned n, i;
+
+	    fprintf(stderr, "%02x ", opcode);
+	    for (int i = 1; i < 5; i++)
+	      fprintf(stderr, "   ");
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
+	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
+	    w = *(unsigned int *)(code_base + nbci + 4);
+	    low = (int)BYTESEX_REVERSE(w);
+	    w = *(unsigned int *)(code_base + nbci + 8);
+	    high = (int)BYTESEX_REVERSE(w);
+	    w = *(unsigned int *)(code_base + nbci + 0);
+	    def = (int)BYTESEX_REVERSE(w);
+	    table = (unsigned int *)(code_base + nbci + 12);
+	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
+	    fprintf(stderr, "\tlow:\t\t0x%08x\n", low);
+	    fprintf(stderr, "\thigh:\t\t0x%08x\n", high);
+	    n = high - low + 1;
+	    while (low <= high) {
+	      int off;
+
+	      w = *table++;
+	      off = (int)BYTESEX_REVERSE(w);
+	      fprintf(stderr, "\toffset %d:\t0x%08x\n", low, off);
+	      low++;
+	    }
+	    bci += len;
+	    if (Thumb2) {
+	      unsigned short *start = codebuf + idx/2;
+	      unsigned short *p = start;
+              while ((*p++ >> 4) != 0xe8d); p++;
+	      hsdis.decode_instructions((char*)start,
+					(char *)p,
+					print_address, NULL, NULL, stderr,
+					Thumb2 ? "force-thumb" : "");
+              idx += (p - start) * 2;
+	    } else {
+              unsigned *start = (unsigned *)((char *)codebuf + idx);
+              unsigned *p = start;
+              // skip to add pc, pc, ip, lsl #1
+              while (*p++ != 0xe08ff08c);
+	      hsdis.decode_instructions((char*)start,
+					(char *)p,
+					print_address, NULL, NULL, stderr,
+					Thumb2 ? "force-thumb" : "");
+              idx += (p - start) * 4;
+            }
+	    for (i = 0; i < n; i++) {
+	      fprintf(stderr, "0x%08x:\t.short\t0x%04x\n", (int)codebuf+idx, *(unsigned short *)((int)codebuf + idx));
+	      idx += 2;
+	    }
+            if (!Thumb2 && (idx & 2)) {
+	      fprintf(stderr, "0x%08x:\t.pad\t0x%04x\n", (int)codebuf+idx, *(unsigned short *)((int)codebuf + idx));
+              idx += 2;
+            }
+	    {
+	      char *p = (char*)codebuf + idx;
+	      int len = 0;
+	      while (len + idx < compiled_len
+		     && START_BCI((len + idx)/2) == -1)
+		len += 2;
+	      hsdis.decode_instructions((char*)p, (char*)p + len,
+					  print_address, NULL, NULL, stderr,
+					  Thumb2 ? "force-thumb" : "");
+	    }
+	    nodisass = 1;
+	    break;
+	  }
+	  case opc_lookupswitch: {
+	    unsigned w;
+	    unsigned nbci = (bci & ~3) + 4;;
+	    int def;
+	    int npairs;	// The Java spec says signed but must be >= 0??
+	    unsigned *table;
+
+	    fprintf(stderr, "%02x ", opcode);
+	    for (int i = 1; i < 5; i++)
+	      fprintf(stderr, "   ");
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
+	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
+
+	    w = *(unsigned int *)(code_base + nbci + 0);
+	    def = (int)BYTESEX_REVERSE(w);
+	    w = *(unsigned int *)(code_base + nbci + 4);
+	    npairs = (int)BYTESEX_REVERSE(w);
+	    table = (unsigned int *)(code_base + nbci + 8);
+	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
+	    fprintf(stderr, "\tnpairs:\t\t0x%08x\n", npairs);
+	    for (int i = 0; i < npairs; i++) {
+	      unsigned match, off;
+	      w = table[0];
+	      match = BYTESEX_REVERSE(w);
+	      w = table[1];
+	      table += 2;
+	      off = BYTESEX_REVERSE(w);
+	      fprintf(stderr, "\t  match: 0x%08x, offset: 0x%08x\n", match, off);
+	    }
+	    break;
+	  }
+
+	  default:
+	    for (int i = 0; i < 5; i++) {
+	      if (i < len)
+		fprintf(stderr, "%02x ", code_base[bci+i]);
+	      else
+		fprintf(stderr, "   ");
+	    }
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)code_base[bci]));
+	    break;
+	}
+	bci += len;
+      }
+    }
+    if (!nodisass) {
+      {
+	int len;
+	unsigned s1, s2;
+
+	s1 = *(unsigned short *)((int)codebuf + idx);
+	s2 = *(unsigned short *)((int)codebuf + idx + 2);
+	if (s1 == UNDEFINED_16 || ((s1 << 16) + s2) == UNDEFINED_32) {
+	  if (s1 == UNDEFINED_16) {
+	    fprintf(stderr, "undefined (0xde00) - UNPATCHED BRANCH???");
+	    len = 2;
+	  } else {
+	    fprintf(stderr, "undefined (0xf7f0a000) - UNPATCHED BRANCH???");
+	    len = 4;
+	  }
+	} else {
+	  char *p = (char*)codebuf + idx;
+	  len = 2;
+	  while (len + idx < compiled_len
+		 && START_BCI((len + idx)/2) == -1)
+	    len += 2;
+	  hsdis.decode_instructions((char*)p, (char*)p + len,
+				      print_address, NULL, NULL, stderr,
+				      Thumb2 ? "force-thumb" : "");
+	}
+	idx += len;
+      }
+    }
+  }
+  fflush(stderr);
+}
+// where
+static void *print_address(void *, const char *tag, void *data) {
+  if (strcmp(tag, "insn") == 0)
+    fprintf(stderr, "0x%08x:\t", (unsigned int)data);
+  return NULL;
+}
+#endif // T2_PRINT_DISASS
+
+#define BCI(len, pop, push, special, islocal, islocal_n, isstore, local_n, local_type) \
+	((len) | ((pop)<<3) | ((push)<<6) | (unsigned)((special) << 31) | ((islocal) << 30) | ((islocal_n) << 29) | ((isstore) << 28) | ((local_n) << 9) | ((local_type) << 11))
+
+#define BCI_LEN(x) 	((x) & 7)
+#define BCI_POP(x) 	(((x)>>3) & 7)
+#define BCI_PUSH(x) 	(((x)>>6) & 7)
+#define BCI_LOCAL_N(x)	(((x)>>9) & 3)
+#define BCI_LOCAL_TYPE(x) (((x) >> 11) & 7)
+
+#define BCI_TYPE_INT	0
+#define BCI_TYPE_LONG	1
+#define BCI_TYPE_FLOAT	2
+#define BCI_TYPE_DOUBLE	3
+#define BCI_TYPE_REF	4
+
+#define BCI_SPECIAL(x) 	((x) & 0x80000000)
+#define BCI_ISLOCAL(x)	((x) & 0x40000000)
+#define BCI_ISLOCAL_N(x) ((x) & 0x20000000)
+#define BCI_ISSTORE(x)	((x) & 0x10000000)
+
+static const unsigned bcinfo[256] = {
+	BCI(1, 0, 0, 0, 0, 0, 0, 0, 0),	// nop
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// aconst_null
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_m1
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_0
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_1
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_2
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_3
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_4
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_5
+	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// lconst_0
+	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// lconst_1
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_0
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_1
+	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_2
+	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// dconst_0
+	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// dconst_1
+	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// bipush
+	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// bipush
+	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// ldc
+	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// ldc_w
+	BCI(3, 0, 2, 0, 0, 0, 0, 0, 0),	// ldc2_w
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload
+	BCI(2, 0, 2, 0, 1, 0, 0, 0, BCI_TYPE_LONG),	// lload
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_FLOAT),	// fload
+	BCI(2, 0, 2, 0, 1, 0, 0, 0, BCI_TYPE_DOUBLE),	// dload
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_REF),	// aload
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3
+	BCI(1, 0, 2, 0, 1, 1, 0, 0, BCI_TYPE_LONG),	// lload_0
+	BCI(1, 0, 2, 0, 1, 1, 0, 1, BCI_TYPE_LONG),	// lload_1
+	BCI(1, 0, 2, 0, 1, 1, 0, 2, BCI_TYPE_LONG),	// lload_2
+	BCI(1, 0, 2, 0, 1, 1, 0, 3, BCI_TYPE_LONG),	// lload_3
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_FLOAT),	// fload_0
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_FLOAT),	// fload_1
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_FLOAT),	// fload_2
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_FLOAT),	// fload_3
+	BCI(1, 0, 2, 0, 1, 1, 0, 0, BCI_TYPE_DOUBLE),	// dload_0
+	BCI(1, 0, 2, 0, 1, 1, 0, 1, BCI_TYPE_DOUBLE),	// dload_1
+	BCI(1, 0, 2, 0, 1, 1, 0, 2, BCI_TYPE_DOUBLE),	// dload_2
+	BCI(1, 0, 2, 0, 1, 1, 0, 3, BCI_TYPE_DOUBLE),	// dload_3
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_REF),	// aload_0
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_REF),	// aload_1
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_REF),	// aload_2
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_REF),	// aload_3
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iaload
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// laload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// faload
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// daload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// aaload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// baload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// caload
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// saload
+	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_INT),	// istore
+	BCI(2, 2, 0, 0, 1, 0, 1, 0, BCI_TYPE_LONG),	// lstore
+	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_FLOAT),	// fstore
+	BCI(2, 2, 0, 0, 1, 0, 1, 0, BCI_TYPE_DOUBLE),	// dstore
+	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_REF),	// astore
+	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_INT),	// istore_0
+	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_INT),	// istore_1
+	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_INT),	// istore_2
+	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_INT),	// istore_3
+	BCI(1, 2, 0, 0, 1, 1, 1, 0, BCI_TYPE_LONG),	// lstore_0
+	BCI(1, 2, 0, 0, 1, 1, 1, 1, BCI_TYPE_LONG),	// lstore_1
+	BCI(1, 2, 0, 0, 1, 1, 1, 2, BCI_TYPE_LONG),	// lstore_2
+	BCI(1, 2, 0, 0, 1, 1, 1, 3, BCI_TYPE_LONG),	// lstore_3
+	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_FLOAT),	// fstore_0
+	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_FLOAT),	// fstore_1
+	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_FLOAT),	// fstore_2
+	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_FLOAT),	// fstore_3
+	BCI(1, 2, 0, 0, 1, 1, 1, 0, BCI_TYPE_DOUBLE),	// dstore_0
+	BCI(1, 2, 0, 0, 1, 1, 1, 1, BCI_TYPE_DOUBLE),	// dstore_1
+	BCI(1, 2, 0, 0, 1, 1, 1, 2, BCI_TYPE_DOUBLE),	// dstore_2
+	BCI(1, 2, 0, 0, 1, 1, 1, 3, BCI_TYPE_DOUBLE),	// dstore_3
+	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_REF),	// astore_0
+	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_REF),	// astore_1
+	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_REF),	// astore_2
+	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_REF),	// astore_3
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// iastore
+	BCI(1, 4, 0, 0, 0, 0, 0, 0, 0),	// dastore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// fastore
+	BCI(1, 4, 0, 0, 0, 0, 0, 0, 0),	// lastore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// aastore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// bastore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// castore
+	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// sastore
+	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// pop
+	BCI(1, 2, 0, 0, 0, 0, 0, 0, 0),	// pop2
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// dup
+	BCI(1, 2, 3, 0, 0, 0, 0, 0, 0),	// dup_x1
+	BCI(1, 3, 4, 0, 0, 0, 0, 0, 0),	// dup_x2
+	BCI(1, 2, 4, 0, 0, 0, 0, 0, 0),	// dup2
+	BCI(1, 3, 5, 0, 0, 0, 0, 0, 0),	// dup2_x1
+	BCI(1, 4, 6, 0, 0, 0, 0, 0, 0),	// dup2_x2
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// swap
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ladd
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fadd
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dadd
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lsub
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fsub
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dsub
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// imul
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lmul
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fmul
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dmul
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// idiv
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ldiv
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fdiv
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ddiv
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// irem
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lrem
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// frem
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// drem
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// ineg
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// lneg
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// fneg
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// dneg
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ishl
+	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lshl
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ishr
+	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lshr
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iushr
+	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lushr
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// land
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lor
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lxor
+	BCI(3, 0, 0, 0, 1, 0, 1, 0, BCI_TYPE_INT),	// iinc
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// i2l
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2f
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// i2d
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// l2i
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// l2f
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// l2d
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// f2i
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// f2l
+	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// f2d
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// d2i
+	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// d2l
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// d2f
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2b
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2c
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2s
+	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// lcmp
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fcmpl
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fcmpg
+	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// dcmpl
+	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// dcmpg
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifeq
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifne
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// iflt
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifge
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifgt
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifle
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpeq
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpne
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmplt
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpge
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpgt
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmple
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_acmpeq
+	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_acmpne
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// goto
+	BCI(3, 0, 1, 1, 0, 0, 0, 0, 0),	// jsr
+	BCI(2, 0, 0, 1, 0, 0, 0, 0, 0),	// ret
+	BCI(0, 1, 0, 1, 0, 0, 0, 0, 0),	// tableswitch
+	BCI(0, 1, 0, 1, 0, 0, 0, 0, 0),	// lookupswitch
+	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// ireturn
+	BCI(1, 2, 0, 1, 0, 0, 0, 0, 0),	// lreturn
+	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// freturn
+	BCI(1, 2, 0, 1, 0, 0, 0, 0, 0),	// dreturn
+	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// areturn
+	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// return
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// getstatic
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// putstatic
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// getfield
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// putfield
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokevirtual
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokespecial
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokestatic
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokeinterface
+	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// invokedynamic
+	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// new
+	BCI(2, 1, 1, 0, 0, 0, 0, 0, 0),	// newarray
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// anewarray
+	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// arraylength
+	BCI(1, 1, 1, 1, 0, 0, 0, 0, 0),	// athrow
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// checkcast
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// instanceof
+	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// monitorenter
+	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// monitorexit
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// wide
+	BCI(4, 0, 0, 1, 0, 0, 0, 0, 0),	// multianewarray
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifnull
+	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifnonnull
+	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// goto_w
+	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// jsr_w
+	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// breakpoint
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xcb
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// bgetfield
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// cgetfield
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xce
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xcf
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// igetfield
+	BCI(3, 1, 2, 0, 0, 0, 0, 0, 0),	// lgetfield
+	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// sgetfield
+	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// aputfield
+	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// bputfield
+	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// cputfield
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xd6
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xd7
+	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// iputfield
+	BCI(3, 3, 0, 0, 0, 0, 0, 0, 0),	// lputfield
+	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xda
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_REF),	// iaccess_0
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_REF),	// iaccess_1
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_REF),	// iaccess_2
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_REF),	// iaccess_3
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokeresolved
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokespecialresolved
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokestaticresolved
+	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokevfinal
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iload
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iload_N
+	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// fast_aldc
+	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// fast_aldc_w
+	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// return_register_finalizer
+	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dmac
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iconst_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iconst_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iconst_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iconst_N
+	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iconst_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor_istore_N
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd_u4store
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub_u4store
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand_u4store
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior_u4store
+	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor_u4store
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iload
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iload
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iload
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iload
+	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iload_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iload_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iload_N
+	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iload_N
+};
+
+void Thumb2_pass1(Thumb2_Info *jinfo, unsigned stackdepth, unsigned bci)
+{
+  unsigned code_size = jinfo->code_size;
+  jubyte *code_base = jinfo->code_base;
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  unsigned *locals_info = jinfo->locals_info;
+  JDEBUG_ (
+   Symbol *name = jinfo->method->name();
+   Symbol *sig = jinfo->method->signature();
+  );
+  //constantPoolCacheOop cp = jinfo->method->constants()->cache();
+
+  bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+  while (bci < code_size) {
+    unsigned stackinfo = bc_stackinfo[bci];
+    unsigned bytecodeinfo;
+    unsigned opcode;
+
+    if (stackinfo & BC_VISITED) break;
+    JASSERT((int)stackdepth >= 0, "stackdepth < 0!!");
+    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | stackdepth | BC_VISITED;
+    opcode = code_base[bci];
+//	printf("bci = 0x%04x, opcode = 0x%02x (%s)", bci, opcode,  Bytecodes::name((Bytecodes::Code)opcode));
+    bytecodeinfo = bcinfo[opcode];
+    if (!BCI_SPECIAL(bytecodeinfo)) {
+      if (BCI_ISLOCAL(bytecodeinfo)) {
+	unsigned local = BCI_LOCAL_N(bytecodeinfo);
+	unsigned local_type = BCI_LOCAL_TYPE(bytecodeinfo) + LOCAL_INT;
+	unsigned local_modified = 0;
+	unsigned linfo;
+	unsigned read_count, write_count;
+
+	if (!BCI_ISLOCAL_N(bytecodeinfo)) local = code_base[bci+1];
+	if (BCI_ISSTORE(bytecodeinfo)) local_modified = 1U << LOCAL_MODIFIED;
+	linfo = locals_info[local];
+	read_count = LOCAL_READS(linfo);
+	write_count = LOCAL_WRITES(linfo);
+	if (local_modified)
+	  write_count = LOCAL_INC_COUNT(write_count);
+	else
+	  read_count = LOCAL_INC_COUNT(read_count);
+	
+	locals_info[local] |= (1 << local_type) | LOCAL_SET_COUNTS(read_count, write_count) | local_modified;
+	if (local_type == LOCAL_LONG || local_type == LOCAL_DOUBLE) {
+	  locals_info[local+1] |= (1 << local_type) | LOCAL_SET_COUNTS(read_count, write_count) | local_modified;
+	}
+      }
+      bci += BCI_LEN(bytecodeinfo);
+      stackdepth += BCI_PUSH(bytecodeinfo) - BCI_POP(bytecodeinfo);
+      JASSERT(stackdepth <= (unsigned)jinfo->method->max_stack(), "stack over/under flow?");
+      continue;
+    }
+
+    switch (opcode) {
+
+      case opc_goto: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	bci += off;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
+	break;
+      }
+      case opc_goto_w: {
+	int off = GET_JAVA_U4(code_base+bci+1);
+	bci += off;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
+	break;
+      }
+
+      case opc_ifeq:
+      case opc_ifne:
+      case opc_iflt:
+      case opc_ifge:
+      case opc_ifgt:
+      case opc_ifle:
+      case opc_ifnull:
+      case opc_ifnonnull: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	stackdepth -= 1;
+        Thumb2_pass1(jinfo, stackdepth, bci + off);
+	bci += 3;
+	break;
+      }
+
+      case opc_if_icmpeq:
+      case opc_if_icmpne:
+      case opc_if_icmplt:
+      case opc_if_icmpge:
+      case opc_if_icmpgt:
+      case opc_if_icmple:
+      case opc_if_acmpeq:
+      case opc_if_acmpne: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	stackdepth -= 2;
+        Thumb2_pass1(jinfo, stackdepth, bci + off);
+	bci += 3;
+	break;
+      }
+
+      case opc_jsr: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
+	bci += 3;
+	stackdepth = 0;
+	break;
+      }
+      case opc_jsr_w: {
+	int off = GET_JAVA_U4(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
+	bci += 5;
+	break;
+      }
+
+      case opc_ireturn:
+      case opc_lreturn:
+      case opc_freturn:
+      case opc_dreturn:
+      case opc_areturn:
+      case opc_return:
+      case opc_return_register_finalizer:
+      case opc_ret:
+      case opc_athrow:
+	// The test for BC_VISITED above will break out of the loop!!!
+	break;
+
+      case opc_tableswitch: {
+	int low, high;
+	unsigned w;
+	unsigned *table;
+	unsigned nbci;
+	int def;
+
+	stackdepth -= 1;
+	nbci = bci & ~3;
+	w = *(unsigned int *)(code_base + nbci + 8);
+	low = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 12);
+	high = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 4);
+	def = (int)BYTESEX_REVERSE(w);
+	table = (unsigned int *)(code_base + nbci + 16);
+
+	while (low <= high) {
+	  int off;
+	  w = *table++;
+	  off = (int)BYTESEX_REVERSE(w);
+	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	  Thumb2_pass1(jinfo, stackdepth, bci + off);
+	  low++;
+	}
+
+	bci += def;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
+	break;
+      }
+
+      case opc_lookupswitch: {
+	unsigned w;
+	unsigned nbci;
+	int def;
+	int npairs;	// The Java spec says signed but must be >= 0??
+	unsigned *table;
+
+	stackdepth -= 1;
+	nbci = bci & ~3;
+	w = *(unsigned int *)(code_base + nbci + 4);
+	def = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 8);
+	npairs = (int)BYTESEX_REVERSE(w);
+	table = (unsigned int *)(code_base + nbci + 16);
+
+	for (int i = 0; i < npairs; i++) {
+	  int off;
+	  w = *table;
+	  table += 2;
+	  off = (int)BYTESEX_REVERSE(w);
+	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	  Thumb2_pass1(jinfo, stackdepth, bci + off);
+	}
+
+	bci += def;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
+	break;
+      }
+
+      case opc_getstatic:
+      case opc_putstatic:
+      case opc_getfield:
+      case opc_putfield: {
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	constantPoolOop pool = jinfo->method->constants();
+	Symbol *sig = pool->signature_ref_at(index);
+	const jbyte *base = sig->base();
+	jbyte c = *base;
+	int stackchange;
+
+	opcode = code_base[bci];
+	if (opcode == opc_getfield || opcode == opc_putfield)
+	  stackdepth -= 1;
+	stackchange = 1;
+	if (c == 'J' || c == 'D') stackchange = 2;
+	if (opcode == opc_getfield || opcode == opc_getstatic)
+	  stackdepth += stackchange;
+	else
+	  stackdepth -= stackchange;
+	bci += 3;
+	break;
+      }
+
+      case opc_invokedynamic: {
+	int site_index = GET_NATIVE_U4(code_base+bci+1);
+	constantPoolOop pool = jinfo->method->constants();
+	int main_index = pool->cache()->secondary_entry_at(site_index)->main_entry_index();
+	JDEBUG_( int pool_index = pool->cache()->entry_at(main_index)->constant_pool_index(); );
+	Symbol *sig = pool->signature_ref_at(main_index);
+	const jbyte *base = sig->base();
+
+	JDEBUG_( tty->print("InvokeDynamic %d: %s: %s %d %d\n", opcode, name->as_C_string(), sig->as_C_string(), main_index, pool_index); );
+	stackdepth += method_stackchange(base);
+	opcode = code_base[bci];
+	bci += 5;
+	break;
+      }
+
+      case opc_invokeresolved:
+      case opc_invokespecialresolved:
+      case opc_invokestaticresolved:
+      case opc_invokevfinal:
+      case opc_invokeinterface:
+      case opc_invokevirtual:
+      case opc_invokespecial:
+      case opc_invokestatic: {
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	constantPoolOop pool = jinfo->method->constants();
+	Symbol *sig = pool->signature_ref_at(index);
+	const jbyte *base = sig->base();
+
+	jinfo->is_leaf = 0;
+	JDEBUG_( tty->print("%d: %s: %s\n", opcode, name->as_C_string(), sig->as_C_string()); );
+	stackdepth += method_stackchange(base);
+	opcode = code_base[bci];
+	bci += 3;
+	if (opcode == opc_invokeinterface) bci += 2;
+	if (opcode != opc_invokestatic && opcode != opc_invokestaticresolved)
+	  stackdepth -= 1;
+	JDEBUG_( tty->print("invoke %d: %s: %s %d %d %d\n", opcode, name->as_C_string(), sig->as_C_string(),
+	         bci, index, stackdepth); );
+	break;
+      }
+
+      case opc_multianewarray:
+	stackdepth = (stackdepth - code_base[bci+3]) + 1;
+	bci += 4;
+	break;
+
+      case opc_wide: {
+	opcode = code_base[bci+1];
+	if (opcode == opc_iinc) {
+	  bci += 6;
+	} else {
+	  bci += 4;
+	  if (opcode == opc_iload ||
+	  	opcode == opc_fload || opcode == opc_aload)
+	    stackdepth += 1;
+	  else if (opcode == opc_lload || opcode == opc_dload)
+	    stackdepth += 2;
+	  else if (opcode == opc_istore ||
+	  	opcode == opc_fstore || opcode == opc_astore)
+	    stackdepth -= 1;
+	  else if (opcode == opc_lstore || opcode == opc_dstore)
+	    stackdepth -= 2;
+	  else if (opcode != opc_ret)
+	    fatal(err_msg("Undefined wide opcode %d\n", opcode));
+	}
+	break;
+      }
+
+      default:
+	opcode = code_base[bci];
+	fatal(err_msg("Undefined opcode %d\n", opcode));
+	break;
+    }
+  }
+}
+
+void Thumb2_RegAlloc(Thumb2_Info *jinfo)
+{
+  unsigned *locals_info = jinfo->locals_info;
+  unsigned i, j;
+  unsigned linfo;
+  unsigned score, max_score;
+  unsigned local;
+  unsigned nlocals = jinfo->method->max_locals();
+  unsigned *pregs = jinfo->jregs->pregs;
+  unsigned npregs = jinfo->jregs->npregs;
+
+  for (i = 0; i < npregs; i++) jinfo->jregs->mapping[i] = -1;
+  for (i = 0; i < npregs; i++) {
+    if (jinfo->use_istate && pregs[i] == Ristate) continue;
+    max_score = 0;
+    for (j = 0; j < nlocals; j++) {
+      linfo = locals_info[j];
+
+      if (linfo & ((1<<LOCAL_ALLOCATED)|(1<<LOCAL_DOUBLE))) continue;
+      score = LOCAL_READS(linfo) + LOCAL_WRITES(linfo);
+      if (linfo & (1<<LOCAL_MODIFIED)) score = (score+1) >> 2;
+      if (linfo & (1<<LOCAL_REF)) score = score - (score >> 2);
+      if (linfo & (1<<LOCAL_LONG)) score = (score+1) >> 2;
+      if (score > max_score) max_score = score, local = j;
+    }
+    if (max_score < (OSPACE ? 8 : 2)) break;
+    locals_info[local] |= 1<<LOCAL_ALLOCATED;
+    jinfo->jregs->r_local[local] = pregs[i];
+    jinfo->jregs->mapping[i] = local;
+  }
+#ifdef T2_PRINT_REGUSAGE
+  if (t2_print_regusage) {
+    fprintf(stderr, "Regalloc: %d physical registers allocated as follows\n", npregs);
+    for (j = 0; j < nlocals; j++) {
+      unsigned r = jinfo->jregs->r_local[j];
+      if (r) {
+	unsigned typ = (locals_info[j] >> LOCAL_INT) & 0x1f;
+	fprintf(stderr, "  ARM Reg R%d -> local %d (type = %s)\n", r, j, local_types[LOG2(typ)]);
+      }
+    }
+  }
+#endif
+}
+
+//-------------------------------------------------------------------------------------
+
+#define	DA	0
+#define	IA	1
+#define DB	2
+#define IB	3
+
+#define	PUSH_ED	0
+#define PUSH_EA	1
+#define	PUSH_FD	2
+#define	PUSH_FA	3
+
+#define	POP_FA	0
+#define	POP_FD	1
+#define	POP_EA	2
+#define	POP_ED	3
+
+#define ROR(imm, sh) (((imm) >> (sh)) | ((imm) << (32 - (sh))))
+#define ROL(imm, sh) (((imm) << (sh)) | ((imm) >> (32 - (sh))))
+
+#define abs(i) ((i) < 0 ? -(i) : (i))
+#define U(i) ((i) < 0 ? 0 : 1)
+
+#define LS_STR		0
+#define	LS_STRB		1
+#define	LS_STRH		2
+#define LS_LDRSB	3
+#define	LS_LDR		4
+#define LS_LDRB		5
+#define	LS_LDRH		6
+#define LS_LDRSH	7
+#define LS_LDRD		8
+#define LS_STRD		9
+
+#define LS_IS_LDR(op)	((op) >= LS_LDRSB)
+#define LS_IS_WORD(op)	(((op) & 3) == LS_STR)
+#define LS_IS_BYTE(op)	(((op) & 3) == LS_STRB || (op) == LS_LDRSB)
+#define LS_IS_HW(op)	(((op) & 3) == LS_STRH || (op) == LS_LDRSH)
+#define LS_IS_IMM12(op)	((op) == LS_STR || (op) == LS_STRB || (op) == LS_LDR || (op) == LS_LDRB)
+
+static const unsigned t_ls_ops[16] = {
+	0x5000,		0xf8400000,
+	0x5400,		0xf8000000,
+	0x5200,		0xf8200000,
+	0x5600,		0xf9100000,
+	0x5800,		0xf8500000,
+	0x5c00,		0xf8100000,
+	0x5a00,		0xf8300000,
+	0x5e00,		0xf9300000,
+};
+
+static const unsigned a_ls_ops[20] = {
+	0xe4000000,	0xe6000000,	// str
+	0xe4400000,	0xe6400000,	// strb
+	0xe04000b0,	0xe00000b0,	// strh
+	0xe05000d0,	0xe01000d0,	// ldrsb
+	0xe4100000,	0xe6100000,	// ldr
+	0xe4500000,	0xe6500000,	// ldrb
+	0xe05000b0,	0xe01000b0,	// ldrh
+	0xe05000f0,	0xe01000f0,	// ldrsh
+	0xe04000d0,	0xe00000d0,	// ldrd
+	0xe04000f0,	0xe00000f0,	// strd
+};
+
+#define A_LS_OP_IMM(op)	a_ls_ops[(op)*2]
+#define A_LS_OP_REG(op)	a_ls_ops[(op)*2+1]
+
+
+#define DP_ADC	0
+#define DP_ADD	1
+#define DP_AND	2
+#define DP_ASR	3
+#define DP_BIC	4
+#define DP_CMN	5
+#define DP_CMP	6
+#define DP_EOR	7
+#define DP_LSL	8
+#define DP_LSR	9
+#define DP_MOV	10
+#define DP_MVN	11
+#define DP_ORN	12
+#define DP_ORR	13
+#define DP_ROR	14
+#define DP_RSB	15
+#define DP_SBC	16
+#define DP_SUB	17
+#define DP_TEQ	18
+#define DP_TST	19
+#define DP_MUL	20
+
+static const unsigned n_ops[] = {
+	DP_SBC,		// ADC	x, y == SBC x, ~y
+	DP_SUB,		// ADD	x, y == SUB x, -y
+	DP_BIC,		// AND	x, y == BIX x, ~y
+	(unsigned)-1,	// ASR
+	DP_AND,		// BIC	x, y == AND x, ~y
+	DP_CMP,		// CMN	x, y == CMP x, -y
+	DP_CMN,		// CMP	x, y == CMN x, -y
+	(unsigned)-1,	// EOR
+	(unsigned)-1,	// LSL
+	(unsigned)-1,	// LSR
+	DP_MVN,		// MOV	x, y == MVN x, ~y
+	DP_MOV,		// MVN	x, y == MOV x, ~y
+	DP_ORR,		// ORN	x, y == ORR x, ~y
+	DP_ORN,		// ORR	x, y == ORN x, ~y
+	(unsigned)-1,	// ROR
+	(unsigned)-1,	// RSB
+	DP_ADC,		// SBC	x, y == ADC x, ~y
+	DP_ADD,		// ADD	x, y == SUB x, -y
+	(unsigned)-1,	// TEQ
+	(unsigned)-1,	// TST
+	(unsigned)-1,	// MUL
+};
+
+#define N_OP(op)	n_ops[(op)]
+
+static const unsigned t_dop_ops[] = {
+//	Rd, Rm, #N	Rd, Rn, Rm
+	0xf1400000,	0xeb400000,	// ADC
+	0xf1000000,	0xeb000000,	// ADD
+	0xf0000000,	0xea000000,	// AND
+	0xea4f0020,	0xfa40f000,	// ASR
+	0xf0200000,	0xea200000,	// BIC
+	0xf1100f00,	0xeb100f00,	// CMN
+	0xf1b00f00,	0xebb00f00,	// CMP
+	0xf0800000,	0xea800000,	// EOR
+	0xea4f0000,	0xfa00f000,	// LSL
+	0xea4f0010,	0xfa20f000,	// LSR
+	0xf04f0000,	0xea4f0000,	// MOV
+	0xf06f0000,	0xea6f0000,	// MVN
+	0xf0600000,	0xea600000,	// ORN
+	0xf0400000,	0xea400000,	// ORR
+	0xea4f0030,	0xfa6f0000,	// ROR
+	0xf1c00000,	0xebc00000,	// RSB
+	0xf1600000,	0xeb600000,	// SBC
+	0xf1a00000,	0xeba00000,	// SUB
+	0xf0900f00,	0xea900f00,	// TEQ
+	0xf0100f00,	0xea100f00,	// TST
+	(unsigned)-1,	0xfb00f000,	// MUL
+};
+
+#define T_DP_IMM(op)	t_dop_ops[(op)*2]
+#define T_DP_REG(op)	t_dop_ops[(op)*2+1]
+
+static const unsigned a_dop_ops[] = {
+//	Rd, Rm, #N	Rd, Rn, Rm
+	0xe2a00000,	0xe0a00000,	// ADC
+	0xe2800000,	0xe0800000,	// ADD
+	0xe2000000,	0xe0000000,	// AND
+	0xe1a00040,	0xe1a00050,	// ASR
+	0xe3c00000,	0xe1c00000,	// BIC
+	0xe3700000,	0xe1700000,	// CMN
+	0xe3500000,	0xe1500000,	// CMP
+	0xe2200000,	0xe0200000,	// EOR
+	0xe1a00000,	0xe1a00010,	// LSL
+	0xe1a00020,	0xe1a00030,	// LSR
+	0xe3a00000,	0xe1a00000,	// MOV
+	0xe3e00000,	0xe1e00000,	// MVN
+	(unsigned)-1,	(unsigned)-1,	// ORN - only Thumb
+	0xe3800000,	0xe1800000,	// ORR
+	0xe1a00060,	0xe1a00070,	// ROR
+	0xe2600000,	0xe0600000,	// RSB
+	0xe2c00000,	0xe0c00000,	// SBC
+	0xe2400000,	0xe0400000,	// SUB
+	0xe3300000,	0xe1300000,	// TEQ
+	0xe3100000,	0xe1100000,	// TST
+	(unsigned)-1,	0xe0000090,	// MUL
+};
+
+#define A_DP_IMM(op)	a_dop_ops[(op)*2]
+#define A_DP_REG(op)	a_dop_ops[(op)*2+1]
+
+#define VP_ADD	0
+#define VP_SUB	1
+#define VP_MUL	2
+#define VP_DIV	3
+#define VP_SQRT 4
+
+static const unsigned t_vop_ops[] = {
+	0xee300a00,			// VADD
+	0xee300a40,			// VSUB
+	0xee200a00,			// VMUL
+	0xee800a00,			// VDIV
+	0xeeb10bc0			// VSQRT
+};
+
+#define VP_REG(op)	t_vop_ops[op]
+
+#define T1_LS_OP(op)	t_ls_ops[(op)*2]
+#define T2_LS_OP(op)	t_ls_ops[(op)*2+1]
+
+#define SHIFT_LSL	0
+#define SHIFT_LSR	1
+#define SHIFT_ASR	2
+#define SHIFT_ROR	3
+#define SHIFT_RRX	3
+
+//------------------------------------------------------------------------------------
+
+#define A_BX(src)	(0x012fff10 | (src))
+#define A_MOV(dst, src)	(0x01a00000 | ((dst) << 12) | (src))
+#define A_MOVW_IMM16(r, imm) \
+		(0x03000000 | (((imm) & 0xf000) << (16-12)) | ((imm) & 0xfff) | ((r) << 12))
+#define A_MOVT_IMM16(r, imm) \
+		(0x03400000 | (((imm) & 0xf000) << (16-12)) | ((imm) & 0xfff) | ((r) << 12))
+
+#define A_DOP_IMM(op, dst, src, ror, imm)	((op) | ((dst) << 12) | ((src) << 16) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+#define A_DOP_REG(op, dst, lho, rho, st, sh)	((op) | ((dst) << 12) | ((lho) <<16) | (rho) | \
+		((st) << 5) | ((sh) << 7))
+#define A_SHIFT_IMM(op, dst, src, imm)	((op) | ((dst) << 12) | (src) | ((imm) << 7))
+#define A_SHIFT_REG(op, dst, lho, rho)  ((op) | ((dst) << 12) | (lho) | ((rho) << 8))
+#define A_MUL(dst, lho, rho) (0x00000090 | ((dst) << 16) | ((rho) << 8) | (lho))
+
+#define A_MOV_IMM(r, ror, imm)	\
+		(0xe3a00000 | ((r) << 12) | ((ror) << (12-5)) | ((imm) & 0xff))
+#define A_MVN_IMM(r, ror, imm)	\
+		(0xe3e00000 | ((r) << 12) | ((ror) << (12-5)) | ((imm) & 0xff))
+#define A_ORR_IMM(dst, src, ror, imm) \
+		(0xe3800000 | ((src) << 16) | ((dst) << 12) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+#define A_BIC_IMM(dst, src, ror, imm) \
+		(0xe3c00000 | ((src) << 16) | ((dst) << 12) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+#define A_ADD_IMM(dst, src, ror, imm) \
+		(0xe2800000 | ((src) << 16) | ((dst) << 12) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+#define A_SUB_IMM(dst, src, ror, imm) \
+		(0xe2400000 | ((src) << 16) | ((dst) << 12) | \
+		((ror) << (12-5)) | ((imm) & 0xff))
+
+#define A_STM(base, regset, st, wb)	(0xe8000000 | ((st) << 23) | ((wb) << 21) |	\
+					((base) << 16) | (regset))
+#define A_LDM(base, regset, st, wb)	(0xe8100000 | ((st) << 23) | ((wb) << 21) |     \
+					((base) << 16) | (regset))
+
+#define A_LDR_STR_REG(op, xfer, base, off, sh, pre, wb) ((op) | ((base)<<16) | \
+		((xfer)<<12) | (off) | ((sh) << 7) | (SHIFT_LSL << 5) | (pre<<24) | \
+		(1<<23) | (wb<<21))
+
+#define A_LDREX(dst, base)	  (0xe1900f9f | ((base) << 16) | ((dst) << 12))
+#define A_STREX(dst, src, base)  (0xe1800f90 | ((base) << 16) | ((dst) << 12) | (src))
+#define A_LDREXD(dst, base)	  (0xe1b00f9f | ((base) << 16) | ((dst) << 12))
+#define A_STREXD(dst, src, base) (0xe1a00f90 | ((base) << 16) | ((dst) << 12) | (src))
+
+#define A_DMB(option)			(0xf57ff050 | (option))
+
+#define A_SXTB(dst, src)	(0xe6af0070 | ((dst) << 12) | (src))
+#define A_SXTH(dst, src)	(0xe6bf0070 | ((dst) << 12) | (src))
+#define A_UXTH(dst, src)	(0xe6ff0070 | ((dst) << 12) | (src))
+
+#define A_MLA(res, lho, rho, a) \
+		(0xe0200090 | ((res) << 16) | (lho) | ((rho) << 8) | ((a) << 12))
+#define A_UMULL(res_lo, res_hi, lho, rho) \
+		(0xe0800090 | ((res_lo) << 12) | ((res_hi) << 16) | (lho) | ((rho) << 8))
+
+
+#define TBIT 1
+
+#define T_MOV_IMM8(r, imm8)		(0x2000 | ((r)<<8) | (imm8))
+#define T_MOV_BYTELANE(r, typ, b)	(0xf04f0000 | ((typ) << 12) | ((r) << 8) | (b))
+#define T_MOV_ROT_IMM(r, ror, imm)	\
+		(0xf04f0000 | (((ror) & 0x10) << (26-4)) | (((ror) & 0xe) << (12-1)) |	\
+		(((ror) & 1) << 7) | ((r) << 8) | ((imm) & 0x7f))
+#define T_MOVW_IMM16(r, imm)		\
+		(0xf2400000 | (((imm) & 0xf000) << (16-12)) | (((imm) & 0x800) << (26-11)) | \
+		(((imm) & 0x700) << (12-8)) | ((imm) & 0xff) | ((r) << 8))
+#define T_MOVT_IMM16(r, imm)		\
+		(0xf2c00000 | (((imm) & 0xf000) << (16-12)) | (((imm) & 0x800) << (26-11)) | \
+		(((imm) & 0x700) << (12-8)) | ((imm) & 0xff) | ((r) << 8))
+#define T_MVN_BYTELANE(r, typ, b)	(0xf06f0000 | ((typ) << 12) | ((r) << 8) | (b))
+#define T_MVN_ROT_IMM(r, ror, imm)	(0xf06f0000 | (((ror) & 0x10) << (26-4)) |	\
+		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((r) << 8) | ((imm) & 0x7f))
+
+#define T_ORR_ROT_IMM(dst, src, ror, imm)	(0xf0400000 | (((ror) & 0x10) << (26-4)) | \
+		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((src) << 16) |	\
+		((dst) << 8) | ((imm) & 0x7f))
+#define T_ORN_ROT_IMM(dst, src, ror, imm)	(0xf0600000 | (((ror) & 0x10) << (26-4)) | \
+		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((src) << 16) |	\
+		((dst) << 8) | ((imm) & 0x7f))
+
+#define T_STR_IMM5(src, base, imm5)	(0x6000 | ((imm5) << 6) | ((base) << 3) | (src))
+#define T_STR_SP_IMM8(src, imm8)	(0x9000 | ((src) << 8) | (imm8))
+#define T_STR_IMM12(src, base, imm12)	(0xf8c00000 | ((src)<<12) | ((base)<<16) | (imm12))
+#define T_STR_IMM8(src, base, imm8, pre, wb)	(0xf8400800 | ((src)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDR_IMM5(dst, base, imm5)	(0x6800 | ((imm5) << 6) | ((base) << 3) | (dst))
+#define T_LDR_SP_IMM8(src, imm8)	(0x9800 | ((dst) << 8) | (imm8))
+#define T_LDR_IMM12(dst, base, imm12)	(0xf8d00000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDR_IMM8(src, base, imm8, pre, wb)	(0xf8500800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_STRB_IMM5(src, base, imm5)	(0x7000 | ((imm5) << 6) | ((base) << 3) | (src))
+#define T_STRB_IMM12(src, base, imm12)	(0xf8800000 | ((src)<<12) | ((base)<<16) | (imm12))
+#define T_STRB_IMM8(src, base, imm8, pre, wb)	(0xf8000800 | ((src)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRB_IMM5(dst, base, imm5)	(0x7800 | ((imm5) << 6) | ((base) << 3) | (dst))
+#define T_LDRB_IMM12(dst, base, imm12)	(0xf8900000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDRB_IMM8(dst, base, imm8, pre, wb)	(0xf8100800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_STRH_IMM5(dst, base, imm5)	(0x8000 | ((imm5) << 6) | ((base) << 3) | (dst))
+#define T_STRH_IMM12(dst, base, imm12)	(0xf8a00000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_STRH_IMM8(dst, base, imm8, pre, wb)	(0xf8200800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRH_IMM5(dst, base, imm5)	(0x8800 | ((imm5) << 6) | ((base) << 3) | (dst))
+#define T_LDRH_IMM12(dst, base, imm12)	(0xf8b00000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDRH_IMM8(dst, base, imm8, pre, wb)	(0xf8300800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRSH_IMM12(dst, base, imm12)	(0xf9b00000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDRSH_IMM8(dst, base, imm8, pre, wb)	(0xf9300800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRSB_IMM12(dst, base, imm12)	(0xf9900000 | ((dst)<<12) | ((base)<<16) | (imm12))
+#define T_LDRSB_IMM8(dst, base, imm8, pre, wb)	(0xf9100800 | ((dst)<<12) | 		\
+		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
+
+#define T_LDRD_IMM(lo, hi, base, imm8, pre, wb)	(0xe8500000 | ((base)<<16) |		\
+		((lo) << 12) | ((hi)<<8) | ((pre)<<24) | (U(imm8)<<23) | ((wb)<<21) | abs(imm8))
+#define T_STRD_IMM(lo, hi, base, imm8, pre, wb)	(0xe8400000 | ((base)<<16) |		\
+		((lo) << 12) | ((hi)<<8) | ((pre)<<24) | (U(imm8)<<23) | ((wb)<<21) | abs(imm8))
+
+#define T_LDREX(dst, base, off) (0xe8500f00 | ((base) << 16) | ((dst) << 12) | ((off) >> 2))
+#define T_STREX(dst, src, base, off) (0xe8400000 | ((base) << 16) | \
+		((src) << 12) | ((dst) << 8) | ((off >> 2)))
+
+#define T_LDREXD(dst1, dst2, base) (0xe8d0007f | ((base) << 16) | ((dst1) << 12) | (dst2 << 8))
+#define T_STREXD(dst, src1, src2, base) (0xe8c00070 | ((base) << 16) | ((src1) << 12) | (src2 << 8) | dst)
+
+#define T_STM8(base, regset)		(0xc000 | ((base) << 8) | (regset))
+#define T_STM16(base, regset, st, wb)	(0xe8000000 | ((st) << 23) | ((wb) << 21) |	\
+		((base) << 16) | (regset))
+
+#define T_LDM8(base, regset)		(0xc800 | ((base) << 8) | (regset))
+#define	T_LDM16(base, regset, st, wb)	(0xe8100000 | ((st) << 23) | ((wb) << 21) |	\
+		((base) << 16) | (regset))
+#define T_POP(regset)	(0xbc00 | (((regset & (1<<ARM_PC)) >> ARM_PC) << 8) | (regset & 0xff))
+#define T_PUSH(regset)	(0xb400 | (((regset & (1<<ARM_LR)) >> ARM_LR) << 8) | (regset & 0xff))
+
+#define	T1_LDR_STR_REG(op, xfer, base, off) 	((op) | ((off) << 6) | ((base) << 3) | (xfer))
+#define T2_LDR_STR_REG(op, xfer, base, off, sh)	((op) | ((base) << 16) | ((xfer) << 12) | \
+		((sh)<<4) | (off))
+
+#define T_CHKA(size, idx)		(0xca00 | (((size) & 8) << (7-3)) | ((idx) << 3) | ((size) & 7))
+#define T_HBL(handler)			(0xc300 | (handler))
+#define T_MISC_CONTROL(op, option)	(0xf3bf8f00 | ((op)<<4) | option)
+#define T_ENTER_LEAVE(enter)		(T_MISC_CONTROL(enter, 0xf))
+#define T_DMB(option)			(T_MISC_CONTROL(5, option))
+
+#define T1_ADD_IMM(dst, src, imm3)	(0x1c00 | ((imm3) << 6) | ((src) << 3) | (dst))
+#define T2_ADD_IMM(r, imm8)		(0x3000 | ((r) << 8) | (imm8))
+#define T3_ADD_BYTELANE(dst, src, typ, b) (0xf1000000 | ((src) << 16) | ((typ) << 12) | \
+		((dst) << 8) | (b))
+#define T3_ADD_ROT_IMM(dst, src, ror, imm) (0xf1000000 | ((src) << 16) | ((dst) << 8) | \
+		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
+		((imm) & 0x7f))
+#define T4_ADD_IMM(dst, src, imm)	(0xf2000000 | ((src) << 16) | ((dst) << 8) | \
+		(((imm) & 0x800) << (26-11)) | (((imm) & 0x700) << (12-8)) | ((imm) & 0xff))
+
+#define T1_SUB_IMM(dst, src, imm3)	(0x1e00 | ((imm3) << 6) | ((src) << 3) | (dst))
+#define T2_SUB_IMM(r, imm8)		(0x3800 | ((r) << 8) | (imm8))
+#define T3_SUB_BYTELANE(dst, src, typ, b) (0xf1a00000 | ((src) << 16) | ((typ) << 12) | \
+		((dst) << 8) | (b))
+#define T3_SUB_ROT_IMM(dst, src, ror, imm) (0xf1a00000 | ((src) << 16) | ((dst) << 8) | \
+		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
+		((imm) & 0x7f))
+#define T4_SUB_IMM(dst, src, imm)	(0xf2a00000 | ((src) << 16) | ((dst) << 8) | \
+		(((imm) & 0x800) << (26-11)) | (((imm) & 0x700) << (12-8)) | ((imm) & 0xff))
+
+#define T_DOP_BYTELANE(op, dst, src, typ, b)	((op) | ((dst) << 8) | ((src) << 16) | \
+		((typ) << 12) | (b))
+#define T_DOP_ROT_IMM(op, dst, src, ror, imm)	((op) | ((dst) << 8) | ((src) << 16) | \
+		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
+		((imm) & 0x7f))
+#define T_SHIFT_IMM(op, dst, src, imm)	((op) | ((dst) << 8) | (src) | \
+		(((imm) & 3) << 6) | (((imm) & 0x1c) << (12-2)))
+#define T_DOP_REG(op, dst, lho, rho, st, sh)	((op) | ((dst) << 8) | ((lho) << 16) | (rho) | \
+		((st) << 4) | (((sh) & 0x1c) << (12-2)) | (((sh) & 3) << 6))
+#define T3_ADD_BYTELANE(dst, src, typ, b) (0xf1000000 | ((src) << 16) | ((typ) << 12) | \
+		((dst) << 8) | (b))
+
+#define T_CMP_IMM(src, imm)		(0x2800 | ((src) << 8) | (imm))
+#define T_CMP_REG(lho, rho)		(0x4280 | ((rho) << 3) | (lho))
+
+#define T_NEG(dst, src)		(0x4240 | (dst) | ((src) << 3))
+#define T_MVN(dst, src)		(0x43c0 | (dst) | ((src) << 3))
+#define T_MOV(dst, src)		(0x4600 | (((dst) & 8) << (7-3)) | ((src) << 3) | ((dst) & 7))
+
+#define T_VMOVS_TOARM(dst, src)	\
+	(0xee100a10 | ((dst) << 12) | (((src) & 1) << 7) | (((src) & 0x1e)<<(16-1)))
+#define T_VMOVS_TOVFP(dst, src) \
+	(0xee000a10 | ((src) << 12) | (((dst) & 1) << 7) | (((dst) & 0x1e)<<(16-1)))
+
+#define T_VMOVD_TOARM(dst_lo, dst_hi, src) \
+  (0xec500b10 | ((dst_lo) << 12) | ((dst_hi) << 16) | (((src) & 0x10)<<(5-4)) | ((src) & 0x0f))
+#define T_VMOVD_TOVFP(dst, src_lo, src_hi) \
+  (0xec400b10 | ((src_lo) << 12) | ((src_hi) << 16) | (((dst) & 0x10)<<(5-4)) | ((dst) & 0x0f))
+
+// VFP reg to VFP re move.
+#define T_VMOVD_VFP_TOVFP(dst, src) (0xeeb00b40 | (((dst) & 0x0f) << 12) | ((src) & 0x0f))
+
+#define T_VOP_REG_S(op, dst, lho, rho)	((op) |				\
+		(((dst) & 1) << 22) | (((dst) & 0x1e) << (12-1)) | 	\
+		(((lho) & 1) << 7) | (((lho) & 0x1e) << (16-1))	 |	\
+		(((rho) & 1) << 5) | (((rho) & 0x1e) >> 1))
+#define T_VOP_REG_D(op, dst, lho, rho)	((op) |	(1 << 8) |		\
+		(((dst) & 0x10) << (22-4)) | (((dst) & 0xf) << 12) | 	\
+		(((lho) & 0x10) << (7-4)) | (((lho) & 0xf) << 16)   |	\
+		(((rho) & 0x10) << (5-4)) | ((rho) & 0xf))
+
+#define T_VCMP_S(lho, rho, e)		(0xeeb40a40 | ((e) << 7) |	\
+		(((lho) & 1) << 22) | (((lho) & 0x1e) << (12-1)) |	\
+		(((rho) & 1) << 5) | (((rho) & 0x1e) >>1))
+#define T_VCMP_D(lho, rho, e)		(0xeeb40b40 | ((e) << 7) |	\
+		(((lho) & 0x10) << (22-4)) | (((lho) & 0x0f) << 12) |	\
+		(((rho) & 0x10) << (5-4)) | ((rho) & 0x0f))
+#define T_VMRS(dst)	(0xeef10a10 | ((dst) << 12))
+
+#define T_MLA(res, lho, rho, a) \
+		(0xfb000000 | ((res) << 8) | ((lho) << 16) | (rho) | ((a) << 12))
+#define T_UMULL(res_lo, res_hi, lho, rho) \
+		(0xfba00000 | ((res_lo) << 12) | ((res_hi) << 8) | ((lho) << 16) | (rho))
+
+#define T_BX(src)		(0x4700 | ((src) << 3))
+#define T_TBH(base, idx)	(0xe8d0f010 | ((base) << 16) | (idx))
+
+#define T_SXTB(dst, src)	(0xb240 | ((src) << 3) | (dst))
+#define T_SXTH(dst, src)	(0xb200 | ((src) << 3) | (dst))
+#define T2_SXTB(dst, src)	(0xfa4ff080 | ((dst) << 8) | (src))
+#define T2_SXTH(dst, src)	(0xfa0ff080 | ((dst) << 8) | (src))
+#define T_UXTH(dst, src)	(0xb280 | ((src) << 3) | (dst))
+#define T2_UXTH(dst, src)	(0xfa1ff080 | ((dst) << 8) | (src))
+
+#define COND_EQ 0
+#define COND_NE 1
+#define COND_LT	2
+#define COND_GE 3
+#define COND_GT 4
+#define COND_LE 5
+#define COND_CS 6
+#define COND_CC 7
+#define COND_MI 8
+#define COND_PL 9
+#define COND_AL 10
+
+static unsigned conds[] = {
+	0x0,
+	0x1,
+	0xb,
+	0xa,
+	0xc,
+	0xd,
+	0x2,
+	0x3,
+	0x4,
+	0x5,
+	0xe,
+};
+
+#define IT_MASK_T	8
+#define IT_MASK_TE	0x0c
+#define IT_MASK_TEE	0x0e
+
+#define T_IT(cond, mask) (0xbf00 | (conds[cond] << 4) | (mask))
+
+int out_16_data(CodeBuf *codebuf, u32 s)
+{
+  if (codebuf->idx >= codebuf->limit)
+	longjmp(compiler_error_env, COMPILER_RESULT_FATAL);
+  codebuf->codebuf[codebuf->idx++] = s;
+  return 0;
+}
+
+int out_16(CodeBuf *codebuf, u32 s)
+{
+  JASSERT(Thumb2, "must be Thumb2 in out16");
+  if (codebuf->idx >= codebuf->limit)
+	longjmp(compiler_error_env, COMPILER_RESULT_FATAL);
+  codebuf->codebuf[codebuf->idx++] = s;
+  return 0;
+}
+
+int out_16x2(CodeBuf *codebuf, u32 sx2)
+{
+  unsigned s1 = sx2 >> 16;
+  unsigned s2 = sx2 & 0xffff;
+
+  out_16(codebuf, s1);
+  return out_16(codebuf, s2);
+}
+
+int out_32(CodeBuf *codebuf, u32 w)
+{
+  if (codebuf->idx + 2 > codebuf->limit)
+	longjmp(compiler_error_env, COMPILER_RESULT_FATAL);
+  *(u32 *)&(codebuf->codebuf[codebuf->idx]) = w;
+  // printf("%p: 0x%08x\n", &(codebuf->codebuf[codebuf->idx]), w);
+  codebuf->idx += 2;
+  return 0;
+}
+
+int out_arm32(CodeBuf *codebuf, u32 w, unsigned cond)
+{
+  if ((w >> 28) == 0x0f) {
+    JASSERT(cond == COND_AL, "cannot applay cond to always inst");
+  } else {
+    JASSERT((w >> 28) == 0x0e || (w >> 28) == 0, "must be");
+    w &= ~0xf0000000;
+    w |= conds[cond] << 28;
+  }
+  return out_32(codebuf, w | (conds[cond] << 28));
+}
+
+int out_arm32(CodeBuf *codebuf, u32 w)
+{
+  return out_arm32(codebuf, w, COND_AL);
+}
+
+int out_armthumb32(CodeBuf *codebuf, u32 w)
+{
+  if (Thumb2) return out_16x2(codebuf, w);
+  else return out_32(codebuf, w);
+}
+
+u32 out_pos(CodeBuf *codebuf)
+{
+  return (u32)&(codebuf->codebuf[codebuf->idx]);
+}
+
+u32 out_loc(CodeBuf *codebuf)
+{
+  return codebuf->idx * 2;
+}
+
+u32 out_align(CodeBuf *codebuf, unsigned align)
+{
+  while ((out_pos(codebuf) & (align-1)) != 0)
+    ((out_pos(codebuf) & 2) ? out_16_data:out_32)(codebuf, 0);
+  return out_pos(codebuf);
+}
+
+u32 out_align_offset(CodeBuf *codebuf, unsigned align, unsigned offset)
+{
+  while ((out_pos(codebuf) & (align-1)) != offset)
+    ((out_pos(codebuf) & 2) ? out_16:out_32)(codebuf, 0);
+  return out_pos(codebuf);
+}
+
+int it(CodeBuf *codebuf, unsigned cond, unsigned mask)
+{
+  JASSERT(Thumb2, "Must be");
+  if (cond & 1) {
+    // If this is a negated condition, flip all the bits above the
+    // least significant bit that is 1.  Note that at least one bit is
+    // always 1 in mask
+    switch (mask & (-mask)) {
+    case 8:
+      break;
+    case 4:
+      mask ^= 8;
+      break;
+    case 2:
+      mask ^= 0x0c;
+      break;
+    case 1:
+      mask ^= 0x0e;
+      break;
+    default:
+      // Impossible unless someone specified an incorrect mask
+      longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+    }
+  }
+
+  return out_16(codebuf, T_IT(cond, mask));
+}
+
+int thumb_single_shift(unsigned imm)
+{
+  unsigned lsl;
+
+  if (!imm) return -1;
+  lsl = 0;
+  while (!(imm & 0x80000000)) {
+    imm <<= 1;
+    lsl++;
+  }
+  if (lsl >= 24) return -1;
+  if ((imm & 0xff000000) == imm) return lsl+8;
+  return -1;
+}
+
+unsigned a_imm_shift(unsigned imm)
+{
+        unsigned shl, maxshl, z, maxz, i;
+
+// ECN: Need to be cafeful with the selection of the shifts here.
+// Consider, for example, the constant 0xf0000ffc
+// This can be done in 2 instructions if we choose the correct start
+//      MOV Rn, #0xf000000c / ORR Rn, Rn, #0x00000ff0
+// However, choose the wrong start and it takes 3 instructions
+//      MOV Rn, #0x000003fc / ORR Rn, Rn, #0x00000c00 / ORR Rn, Rn, #0xf0000000
+// To get this right we find the longest sequence of 00 bits.
+
+// First shift the immediate down so we have a non zero bit in bits 0,1
+	// short circuit simple imm and keep rotate 0 for readability
+        if (!(imm & ~0xff)) return 0;
+        shl = 0;
+        while ((imm & 3) == 0) {
+                imm >>= 2;
+                shl += 2;
+        }
+	// short circuit where it is all in bottom 16 bits - always 1 or 2
+//	if (!(imm & ~0xffff)) return 32 - shl;
+        maxz = 0;
+        maxshl = 0;
+        z = 0;
+        for (i = 0; i < 32; i += 2) {
+                if ((imm & 3) == 0) {
+                        z++;
+                } else {
+                        if (z > maxz) { maxz = z; maxshl = i; }
+                        z = 0;
+                }
+                imm = ROR(imm, 2);
+        }
+        if (z > maxz) maxshl = 0;
+        return -(shl + maxshl) & 0x1f;
+}
+
+int arm_single_shift(unsigned imm)
+{
+    unsigned sh;
+    unsigned mask;
+
+    sh = a_imm_shift(imm);
+    mask = ROR(0xff, sh);
+    if (imm & ~mask) return -1;
+    return (int)sh;
+}
+
+int thumb_bytelane(u32 imm)
+{
+    unsigned b1 = imm & 0xff;
+    unsigned b2 = (imm >> 8) & 0xff;
+    unsigned b3 = (imm >> 16) & 0xff;
+    unsigned b4 = imm >> 24;
+    int mov_type = -1;
+
+    if (b1 == b3 && b2 == 0 && b4 == 0) mov_type = 1;
+    if (b1 == b2 && b1 == b3 && b1 == b4) mov_type = 3;
+    if (b2 == b4 && b1 == 0 && b3 == 0) mov_type = 2;
+    if (imm < 256) mov_type = 0;
+    return mov_type;
+}
+
+int mov_imm(CodeBuf *codebuf, Reg r, u32 imm, unsigned cond = COND_AL)
+{
+  int mov_type, rol;
+
+  if (Thumb2) {
+    JASSERT(cond == COND_AL, "only COND_AL in Thumb2");
+    if (r < ARM_R8 && imm < 256)
+      return out_16(codebuf, T_MOV_IMM8(r, imm));
+    mov_type = thumb_bytelane(imm);
+    if (mov_type >= 0) {
+      if (mov_type == 2) imm >>= 8;
+      return out_16x2(codebuf, T_MOV_BYTELANE(r, mov_type, (imm & 0xff)));
+    }
+    mov_type = thumb_bytelane(~imm);
+    if (mov_type >= 0) {
+      imm = ~imm;
+      if (mov_type == 2) imm >>= 8;
+      return out_16x2(codebuf, T_MVN_BYTELANE(r, mov_type, (imm & 0xff)));
+    }
+    rol = thumb_single_shift(imm);
+    if (rol >= 0)
+      return out_16x2(codebuf, T_MOV_ROT_IMM(r, rol, ROL(imm, rol)));
+    rol = thumb_single_shift(~imm);
+    if (rol >= 0)
+      return out_16x2(codebuf, T_MVN_ROT_IMM(r, rol, ROL(~imm, rol)));
+    if ((imm & ~0xffff) == 0)
+      return out_16x2(codebuf, T_MOVW_IMM16(r, imm & 0xffff));
+    if (r < ARM_R8) {
+      rol = thumb_single_shift(imm & ~0xff);
+      if (rol >= 0) {
+	out_16(codebuf, T_MOV_IMM8(r, imm & 0xff));
+	return out_16x2(codebuf, T_ORR_ROT_IMM(r, r, rol, ROL(imm & ~0xff, rol)));
+      }
+    }
+    out_16x2(codebuf, T_MOVW_IMM16(r, imm & 0xffff));
+    return out_16x2(codebuf, T_MOVT_IMM16(r, imm >> 16));
+  }
+  if (ARCH_GE_V6T2(CPUInfo)) {
+    // ARMV6T2 or greater, we can use movw/movt
+    int sh;
+    if ((sh = arm_single_shift(imm)) >= 0)
+      return out_arm32(codebuf, A_MOV_IMM(r, sh, ROL(imm, sh)), cond);
+    if ((sh = arm_single_shift(~imm)) >= 0)
+      return out_arm32(codebuf, A_MVN_IMM(r, sh, ROL(~imm, sh)), cond);
+    out_arm32(codebuf, A_MOVW_IMM16(r, imm & 0xffff), cond);
+    if ((imm >> 16) == 0) return 0;
+    return out_arm32(codebuf, A_MOVT_IMM16(r, imm >> 16), cond);
+  }
+  {
+// The following generates an optimal sequence of either
+//   MOV dst, #N followed by up to 3 ORR dst, dst, #N
+//   MVN dst, #N followed by up to 3 BIC dst, dst, #N
+// The following is a profile of how many instructions are required for all
+// integers in the 32 bit range
+// 1 instruction sequence - 6146 times
+// 2 instruction sequence - 5308392 times
+// 3 instruction sequence - 1071714202 times
+// 4 instruction sequence - 3217938556 times
+//
+    unsigned mov_count, mov_shifts[4];
+    unsigned mvn_count, mvn_shifts[4];
+    unsigned im, sh;
+    unsigned mask;
+    unsigned i;
+
+    im = imm;
+    mov_count = 0;
+    do {
+      sh = mov_shifts[mov_count++] = a_imm_shift(im);
+      mask = ROR(0xff, sh);
+      im &= ~mask;
+    } while (im);
+    if (mov_count == 1) {
+      // short circuit the common case of 1 instruction
+      sh = mov_shifts[0];
+      return out_arm32(codebuf, A_MOV_IMM(r, sh, ROL(imm, sh)), cond);
+    }
+    im = ~imm;
+    mvn_count = 0;
+    do {
+      sh = mvn_shifts[mvn_count++] = a_imm_shift(im);
+      mask = ROR(0xff, sh);
+      im &= ~mask;
+    } while (im);
+    if (mov_count <= mvn_count) {
+      sh = mov_shifts[0];
+      out_arm32(codebuf, A_MOV_IMM(r, sh, ROL(imm, sh)), cond);
+      for (i = 1; i < mov_count; i++) {
+	sh = mov_shifts[i];
+	out_arm32(codebuf, A_ORR_IMM(r, r, sh, ROL(imm, sh)), cond);
+      }
+    } else {
+      imm = ~imm;
+      sh = mvn_shifts[0];
+      out_arm32(codebuf, A_MVN_IMM(r, sh, ROL(imm, sh)), cond);
+      for (i = 1; i < mvn_count; i++) {
+	sh = mvn_shifts[i];
+	out_arm32(codebuf, A_BIC_IMM(r, r, sh, ROL(imm,sh)), cond);
+      }
+    }
+    return 0;
+  }
+}
+
+static int add_reg(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho);
+
+int a_ldst_reg(CodeBuf *codebuf, u32 op, Reg src, Reg base, Reg offset, int pre, int wb)
+{
+  if (pre == 0) wb = 0;
+  return out_32(codebuf, A_LS_OP_REG(op) | ((base) << 16) | ((src) << 12) |
+		(offset) | (pre<<24) | (1<<23) | (wb<<21));
+}
+
+int load_store_reg(CodeBuf *codebuf, u32 op, Reg xfer, Reg base, Reg offset,
+							  int pre, int wb)
+{
+  if (Thumb2) {
+    JASSERT(pre, "post not supported");
+    if (xfer < ARM_R8 && base < ARM_R8 && offset < ARM_R8)
+      out_16(codebuf, T1_LDR_STR_REG(T1_LS_OP(op), xfer, base, offset));
+    else
+      out_16x2(codebuf, T2_LDR_STR_REG(T2_LS_OP(op), xfer, base, offset, 0));
+    if (wb) add_reg(codebuf, base, base, offset);
+    return 0;
+  }
+  return a_ldst_reg(codebuf, op, xfer, base, offset, pre, wb);
+}
+
+int str_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_STR, src, base, offset, pre, wb);
+}
+
+int ldr_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDR, dst, base, offset, pre, wb);
+}
+
+int strb_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_STRB, src, base, offset, pre, wb);
+}
+
+int ldrb_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDRB, dst, base, offset, pre, wb);
+}
+
+int strh_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_STRH, src, base, offset, pre, wb);
+}
+
+int ldrh_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDRH, dst, base, offset, pre, wb);
+}
+
+int ldrsh_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDRSH, dst, base, offset, pre, wb);
+}
+
+int ldrsb_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, int pre, int wb)
+{
+  return load_store_reg(codebuf, LS_LDRSB, dst, base, offset, pre, wb);
+}
+
+int ldrex_imm(CodeBuf *codebuf, Reg dst, Reg base)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_LDREX(dst, base, 0));
+  return out_32(codebuf, A_LDREX(dst, base));
+}
+
+int strex_imm(CodeBuf *codebuf, Reg dst, Reg src, Reg base)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_STREX(dst, src, base, 0));
+  return out_32(codebuf, A_STREX(dst, src, base));
+}
+
+int ldrexd(CodeBuf *codebuf, Reg dst0, Reg dst1, Reg base)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_LDREXD(dst0, dst1, base));
+  JASSERT(dst1 == dst0+1, "must be reg pair for ldrexd");
+  return out_32(codebuf, A_LDREXD(dst0, base));
+}
+
+int strexd(CodeBuf *codebuf, Reg dst, Reg src0, Reg src1, Reg base)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_STREXD(dst, src0, src1, base));
+  JASSERT(src1 == src0+1, "must be reg pair for strexd");
+  return out_32(codebuf, A_STREXD(dst, src0, base));
+}
+
+int a_ldst_imm(CodeBuf *codebuf, u32 op, Reg src, Reg base, int offset, int pre, int wb)
+{
+  unsigned uoff = (unsigned)offset;
+  int is_imm12 = LS_IS_IMM12(op);
+  unsigned uoff_limit = is_imm12 ? (1<<12) : (1<<8);
+
+  if (pre == 0) wb = 0;
+  if (offset < 0) uoff = (unsigned)-offset;
+  if (uoff < uoff_limit) {
+    if (!is_imm12) uoff = (uoff & 0xf) | ((uoff & 0xf0) << 4);
+    return out_32(codebuf, A_LS_OP_IMM(op) | ((base) << 16) | ((src) << 12) |
+	          (pre<<24) | ((offset>=0)<<23) | (wb<<21) | uoff);
+  }
+  // Use ARM_LR as it is always the last tmp
+  mov_imm(codebuf, ARM_LR, offset);
+  return a_ldst_reg(codebuf, op, src, base, ARM_LR, pre, wb);
+}
+
+int str_imm_wb(CodeBuf *codebuf, Reg src, Reg base, int offset, int pre, int wb)
+{
+  unsigned uoff;
+
+  if (!pre && !wb) pre = 1, offset = 0;
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (pre && !wb && offset >= 0) {
+      if (base < ARM_R8 && src < ARM_R8 && uoff < 128 && (uoff & 3) == 0)
+	return out_16(codebuf, T_STR_IMM5(src, base, uoff>>2));
+      if (base == ARM_SP && src < ARM_R8 && uoff < 1024 && (uoff &3) ==0)
+	return out_16(codebuf, T_STR_SP_IMM8(src, uoff>>2));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_STR_IMM12(src, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_STR_IMM8(src, base, offset, pre, wb));
+    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return str_reg(codebuf, src, base, ARM_IP, pre, wb);
+  }
+  a_ldst_imm(codebuf, LS_STR, src, base, offset, pre, wb);
+}
+
+int str_imm(CodeBuf *codebuf, Reg src, Reg base, int offset)
+{
+  str_imm_wb(codebuf, src, base, offset, 1, 0);
+}
+
+int ldr_imm_wb(CodeBuf *codebuf, Reg dst, Reg base, int offset, int pre, int wb)
+{
+  unsigned uoff;
+
+  if (!pre && !wb) pre = 1, offset = 0;
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (pre && !wb && offset >= 0) {
+      if (base < ARM_R8 && dst < ARM_R8 && uoff < 128 && (uoff & 3) ==0)
+	return out_16(codebuf, T_LDR_IMM5(dst, base, uoff>>2));
+      if (base == ARM_SP && dst < ARM_R8 && uoff < 1024 & (uoff & 3) == 0)
+	return out_16(codebuf, T_LDR_SP_IMM8(dst, uoff>>2));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDR_IMM12(dst, base, uoff));
+    } else {
+      if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDR_IMM8(dst, base, offset, pre, wb));
+    }
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldr_reg(codebuf, dst, base, ARM_IP, pre, wb);
+  }
+  a_ldst_imm(codebuf, LS_LDR, dst, base, offset, pre, wb);
+}
+
+int ldr_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  return ldr_imm_wb(codebuf, dst, base, offset, 1, 0);
+}
+
+int strb_imm(CodeBuf *codebuf, Reg src, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (base < ARM_R8 && src < ARM_R8 && uoff < 32)
+	return out_16(codebuf, T_STRB_IMM5(src, base, uoff));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_STRB_IMM12(src, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_STRB_IMM8(src, base, offset, 1, 0));
+    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return strb_reg(codebuf, src, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_STRB, src, base, offset, 1, 0);
+}
+
+int ldrb_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (base < ARM_R8 && dst < ARM_R8 && uoff < 32)
+	return out_16(codebuf, T_LDRB_IMM5(dst, base, uoff));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDRB_IMM12(dst, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDRB_IMM8(dst, base, offset, 1, 0));
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldrb_reg(codebuf, dst, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_LDRB, dst, base, offset, 1, 0);
+}
+
+int strh_imm(CodeBuf *codebuf, Reg src, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (base < ARM_R8 && src < ARM_R8 && uoff < 64 && (uoff & 1) == 0)
+	return out_16(codebuf, T_STRH_IMM5(src, base, uoff>>1));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_STRH_IMM12(src, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_STRH_IMM8(src, base, offset, 1, 0));
+    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return strh_reg(codebuf, src, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_STRH, src, base, offset, 1, 0);
+}
+
+int ldrh_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (base < ARM_R8 && dst < ARM_R8 && uoff < 64 && (uoff & 1) == 0)
+	return out_16(codebuf, T_LDRH_IMM5(dst, base, uoff>>1));
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDRH_IMM12(dst, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDRH_IMM8(dst, base, offset, 1, 0));
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldrh_reg(codebuf, dst, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_LDRH, dst, base, offset, 1, 0);
+}
+
+int ldrsh_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDRSH_IMM12(dst, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDRSH_IMM8(dst, base, offset, 1, 0));
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldrsh_reg(codebuf, dst, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_LDRSH, dst, base, offset, 1, 0);
+}
+
+int ldrsb_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset >= 0) {
+      if (uoff < (1 << 12))
+	return out_16x2(codebuf, T_LDRSB_IMM12(dst, base, uoff));
+    } else if (offset < 256 && offset > -256)
+	return out_16x2(codebuf, T_LDRSB_IMM8(dst, base, offset, 1, 0));
+    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
+    mov_imm(codebuf, ARM_IP, offset);
+    return ldrsb_reg(codebuf, dst, base, ARM_IP, 1, 0);
+  }
+  a_ldst_imm(codebuf, LS_LDRSB, dst, base, offset, 1, 0);
+}
+
+int add_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm);
+
+int mov_reg(CodeBuf *codebuf, u32 dst, u32 src, unsigned cond = COND_AL)
+{
+  if (dst == src) return 0;
+  if (Thumb2) {
+    if (cond != COND_AL) it(codebuf, cond, IT_MASK_T);
+    if (dst == ARM_PC) return out_16(codebuf, T_BX(src));
+    return out_16(codebuf, T_MOV(dst, src));
+  }
+  if (dst == ARM_PC) return out_arm32(codebuf, A_BX(src), cond);
+  return out_arm32(codebuf, A_MOV(dst, src), cond);
+}
+
+int stm(CodeBuf *codebuf, u32 regset, u32 base, u32 st, u32 wb)
+{
+  JASSERT(regset != 0, "regset != 0 in stm");
+  if (Thumb2) {
+    if (base < ARM_R8 && (regset & ~0xff) == 0 && st == IA && wb)
+      return out_16(codebuf, T_STM8(base, regset));
+    if (base == ARM_SP) {
+      if ((regset & ~0x40ff) == 0 && st == DB && wb)
+	return out_16(codebuf, T_PUSH(regset));
+    }
+    if ((regset & -regset) == regset)
+      return str_imm_wb(codebuf, LOG2(regset), base, (st & 1) ? 4 : -4, (st & 2) >> 1, wb);
+    JASSERT(st == PUSH_EA || st == PUSH_FD, "only PUSH_EA or PUSH_FD available on Thumb");
+    return out_16x2(codebuf, T_STM16(base, regset, st, wb));
+  }
+  return out_32(codebuf, A_STM(base, regset, st, wb));
+}
+
+int ldm(CodeBuf *codebuf, u32 regset, u32 base, u32 st, u32 wb)
+{
+  JASSERT(regset != 0, "regset != 0 in stm");
+  if (Thumb2) {
+    if (base < ARM_R8 && (regset & ~0xff) == 0 && st == IA && wb)
+      return out_16(codebuf, T_LDM8(base, regset));
+    if (base == ARM_SP) {
+      if ((regset & ~0x80ff) == 0 && st == IA && wb)
+	return out_16(codebuf, T_POP(regset));
+    }
+    if ((regset & -regset) == regset)
+      return ldr_imm_wb(codebuf, LOG2(regset), base, (st & 1) ? 4 : -4, (st & 2) >> 1, wb);
+    JASSERT(st == POP_EA || st == POP_FD, "only POP_EA or POP_FD available on Thumb");
+    return out_16x2(codebuf, T_LDM16(base, regset, st, wb));
+  }
+  return out_32(codebuf, A_LDM(base, regset, st, wb));
+}
+
+// Use this macro before calling ldrd_imm to ensure the regs are in the right order
+// for an ldm when compiling for ARM. If the registers are in the wrong order it does
+// a SWAP (note: this does not actually swap the regs, just renames them)
+#define LDRD_PRE(jstack, lo, hi) do { \
+		if (lo > hi && !Thumb2) { \
+		  Reg tmp = lo; \
+		  lo = hi; \
+		  hi = tmp; \
+		  SWAP(jstack); \
+		} \
+              } while (0)
+
+int ldrd_imm(CodeBuf *codebuf, Reg dst_lo, Reg dst_hi, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset < 256 * 4 && offset > -256 * 4 && (offset & 3) == 0)
+      return out_16x2(codebuf, T_LDRD_IMM(dst_lo, dst_hi, base, offset>>2, 1, 0));
+    add_imm(codebuf, ARM_IP, base, offset);
+    return out_16x2(codebuf, T_LDRD_IMM(dst_lo, dst_hi, ARM_IP, 0, 1, 0));
+  }
+  if (dst_hi == dst_lo + 1 && !(dst_lo & 1))
+    return a_ldst_imm(codebuf, LS_LDRD, dst_lo, base, offset, 1, 0);
+
+  // The ARM instruction set only allows for a single register
+  // in the ldrd instruction, the high register is assumed to
+  // be the low register + 1, even though the Thumb instruction
+  // set allows a pair of registers to be specified.
+  // In addition the low register must be an even register and
+  // it gives an exception if this is not the case even though
+  // on identically the same processor it can handle an odd
+  // register in Thumb.
+  // So we use LDM instead. Note: We must use LDM rather than
+  // 2 x LDR because it is required to be atomic (on non MP core)
+
+  // LDM requires that the regs are in order and the caller must
+  // use LDRD_PRE to ensure this
+  JASSERT(dst_lo < dst_hi, "regs must be in order for ldm");
+
+  // Note: We only do LDMIA (offset=0) and LDMIB (offset=4)
+  if (offset != 0 && offset != 4) {
+    add_imm(codebuf, ARM_IP, base, offset);
+    base = ARM_IP;
+    offset = 0;
+  }
+  return ldm(codebuf, (1<<dst_lo)|(1<<dst_hi), base, offset ? IB:IA, 0);
+}
+
+int strd_imm(CodeBuf *codebuf, Reg src_lo, Reg src_hi, Reg base, int offset)
+{
+  unsigned uoff;
+
+  uoff = (unsigned)offset;
+  if (Thumb2) {
+    if (offset < 256 * 4 && offset > -256 * 4 && (offset & 3) == 0)
+      return out_16x2(codebuf, T_STRD_IMM(src_lo, src_hi, base, offset>>2, 1, 0));
+    add_imm(codebuf, ARM_IP, base, offset);
+    return out_16x2(codebuf, T_STRD_IMM(src_lo, src_hi, ARM_IP, 0, 1, 0));
+  }
+  // See comments above in ldrd_imm
+  if (src_hi == src_lo +1 && !(src_lo & 1))
+    return a_ldst_imm(codebuf, LS_STRD, src_lo, base, offset, 1, 0);
+
+  // If the registers are ooo we cannot simply rename tham like we did for
+  // ldm because the values have already been loaded. So use tmp to ensure
+  // the registers are in order.
+  Reg tmp = ARM_LR;
+  if (src_lo > src_hi) {
+    mov_reg(codebuf, tmp, src_hi);
+    tmp = src_hi;
+    src_hi = ARM_LR;
+  }
+  JASSERT(src_lo < src_hi, "regs must be in order for stm");
+  if (offset != 0 && offset != 4) {
+    add_imm(codebuf, tmp, base, offset);
+    base = tmp;
+    offset = 0;
+  }
+  return stm(codebuf, (1<<src_lo)|(1<<src_hi), base, offset ? IB:IA, 0);
+}
+
+int dop_reg(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho, u32 sh_typ, u32 shift, unsigned cond = COND_AL)
+{
+  unsigned s = 0;
+  if (op != DP_MUL && dst != ARM_PC) s = 1 << 20;
+  if (Thumb2) {
+    if (cond != COND_AL) it(codebuf, cond, IT_MASK_T);
+    return out_16x2(codebuf, T_DOP_REG(T_DP_REG(op)|s, dst, lho, rho, sh_typ, shift));
+  }
+  if (op == DP_MUL) return out_arm32(codebuf, A_MUL(dst, lho, rho), cond);
+  if (op == DP_LSL || op == DP_LSR || op == DP_ASR)
+    return out_arm32(codebuf, A_SHIFT_REG(A_DP_REG(op)|s, dst, lho, rho));
+  return out_arm32(codebuf, A_DOP_REG(A_DP_REG(op)|s, dst, lho, rho, sh_typ, shift), cond);
+}
+
+int sxtb(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  if (Thumb2) {
+    if (dst < ARM_R8 && src < ARM_R8)
+      return out_16(codebuf, T_SXTB(dst, src));
+    return out_16x2(codebuf, T2_SXTB(dst, src));
+  }
+  return out_32(codebuf, A_SXTB(dst, src));
+}
+
+int sxth(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  if (Thumb2) {
+    if (dst < ARM_R8 && src < ARM_R8)
+      return out_16(codebuf, T_SXTH(dst, src));
+    return out_16x2(codebuf, T2_SXTH(dst, src));
+  }
+  return out_32(codebuf, A_SXTH(dst, src));
+}
+
+int uxth(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  if (Thumb2) {
+    if (dst < ARM_R8 && src < ARM_R8)
+      return out_16(codebuf, T_UXTH(dst, src));
+    return out_16x2(codebuf, T2_UXTH(dst, src));
+  }
+  return out_32(codebuf, A_UXTH(dst, src));
+}
+
+int nop_16(CodeBuf *codebuf)
+{
+  return out_16(codebuf, T_MOV(ARM_R0, ARM_R0));
+}
+
+int nop_32(CodeBuf *codebuf)
+{
+  return mov_reg(codebuf, ARM_R8, ARM_R8);
+}
+
+int mvn_reg(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  if (Thumb2 && dst < ARM_R8 && src < ARM_R8)
+    return out_16(codebuf, T_MVN(dst, src));
+  return dop_reg(codebuf, DP_MVN, dst, 0, src, SHIFT_LSL, 0);
+}
+
+int vmov_reg_s_toVFP(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  return out_armthumb32(codebuf, T_VMOVS_TOVFP(dst, src));
+}
+
+int vmov_reg_s_toARM(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  return out_armthumb32(codebuf, T_VMOVS_TOARM(dst, src));
+}
+
+int vmov_reg_d_toVFP(CodeBuf *codebuf, u32 dst, u32 src_lo, u32 src_hi)
+{
+  return out_armthumb32(codebuf, T_VMOVD_TOVFP(dst, src_lo, src_hi));
+}
+
+int vmov_reg_d_VFP_to_VFP(CodeBuf *codebuf, u32 dst, u32 src)
+{
+  return out_armthumb32(codebuf, T_VMOVD_VFP_TOVFP(dst, src));
+}
+
+int vmov_reg_d_toARM(CodeBuf *codebuf, u32 dst_lo, u32 dst_hi, u32 src)
+{
+  return out_armthumb32(codebuf, T_VMOVD_TOARM(dst_lo, dst_hi, src));
+}
+
+int vop_reg_s(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho)
+{
+  return out_armthumb32(codebuf, T_VOP_REG_S(VP_REG(op), dst, lho, rho));
+}
+
+int vop_reg_d(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho)
+{
+  return out_armthumb32(codebuf, T_VOP_REG_D(VP_REG(op), dst, lho, rho));
+}
+
+int vcmp_reg_s(CodeBuf *codebuf, u32 lho, u32 rho, unsigned e)
+{
+  return out_armthumb32(codebuf, T_VCMP_S(lho, rho, e));
+}
+
+int vcmp_reg_d(CodeBuf *codebuf, u32 lho, u32 rho, unsigned e)
+{
+  return out_armthumb32(codebuf, T_VCMP_D(lho, rho, e));
+}
+
+int vmrs(CodeBuf *codebuf, u32 dst)
+{
+  return out_armthumb32(codebuf, T_VMRS(dst));
+}
+
+int add_reg(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho)
+{
+  // ECN: FIXMME: Thumb has a 16 bit ADD dst, lho, rho
+  return dop_reg(codebuf, DP_ADD, dst, lho, rho, SHIFT_LSL, 0);
+}
+
+int cmp_reg(CodeBuf *codebuf, Reg lho, Reg rho, unsigned cond = COND_AL)
+{
+  if (Thumb2 && lho < ARM_R8 && rho < ARM_R8) {
+    if (cond != COND_AL) it(codebuf, cond, IT_MASK_T);
+    return out_16(codebuf, T_CMP_REG(lho, rho));
+  }
+  return dop_reg(codebuf, DP_CMP, 0x0f, lho, rho, SHIFT_LSL, 0, cond);
+}
+
+int add_reg_shift(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho, u2 sh_typ, u32 shift)
+{
+  return dop_reg(codebuf, DP_ADD, dst, lho, rho, sh_typ, shift);
+}
+
+int add_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  int imm_type, rol;
+
+  if (imm == 0) return mov_reg(codebuf, dst, src);
+  if (Thumb2) {
+    if (dst < ARM_R8 && src < ARM_R8) {
+      if (imm < 8)
+	return out_16(codebuf, T1_ADD_IMM(dst, src, imm));
+      if (-imm < 8)
+	return out_16(codebuf, T1_SUB_IMM(dst, src, -imm));
+      if (src == dst) {
+	if (imm < 256)
+	  return out_16(codebuf, T2_ADD_IMM(src, imm));
+	if (-imm < 256)
+	  return out_16(codebuf, T2_SUB_IMM(src, -imm));
+      }
+    }
+    imm_type = thumb_bytelane(imm);
+    if (imm_type >= 0) {
+      if (imm_type == 2) imm >>= 8;
+      return out_16x2(codebuf, T3_ADD_BYTELANE(dst, src, imm_type, (imm & 0xff)));
+    }
+    imm_type = thumb_bytelane(-imm);
+    if (imm_type >= 0) {
+      imm = -imm;
+      if (imm_type == 2) imm >>= 8;
+      return out_16x2(codebuf, T3_SUB_BYTELANE(dst, src, imm_type, (imm & 0xff)));
+    }
+    rol = thumb_single_shift(imm);
+    if (rol >= 0)
+      return out_16x2(codebuf, T3_ADD_ROT_IMM(dst, src, rol, ROL(imm, rol)));
+    rol = thumb_single_shift(-imm);
+    if (rol >= 0)
+      return out_16x2(codebuf, T3_SUB_ROT_IMM(dst, src, rol, ROL(-imm, rol)));
+    if (imm < (1 << 12))
+      return out_16x2(codebuf, T4_ADD_IMM(dst, src, imm));
+    if (-imm < (1 << 12))
+      return out_16x2(codebuf, T4_SUB_IMM(dst, src, -imm));
+    mov_imm(codebuf, ARM_IP, imm);
+    return add_reg(codebuf, dst, src, ARM_IP);
+  }
+  {
+    unsigned add_count, add_shifts[4];
+    unsigned sub_count, sub_shifts[4];
+    unsigned im, sh;
+    unsigned mask;
+    unsigned i;
+
+    im = imm;
+    add_count = 0;
+    do {
+      sh = add_shifts[add_count++] = a_imm_shift(im);
+      mask = ROR(0xff, sh);
+      im &= ~mask;
+    } while (im);
+    if (add_count == 1) {
+      // short circuit the common case of 1 instruction
+      sh = add_shifts[0];
+      return out_32(codebuf, A_ADD_IMM(dst, src, sh, ROL(imm, sh)));
+    }
+    im = -imm;
+    sub_count = 0;
+    do {
+      sh = sub_shifts[sub_count++] = a_imm_shift(im);
+      mask = ROR(0xff, sh);
+      im &= ~mask;
+    } while (im);
+    if (add_count <= sub_count) {
+      for (i = 0; i < add_count; i++) {
+	sh = add_shifts[i];
+	out_32(codebuf, A_ADD_IMM(dst, src, sh, ROL(imm, sh)));
+	src = dst;
+      }
+    } else {
+      imm = -imm;
+      for (i = 0; i < sub_count; i++) {
+	sh = sub_shifts[i];
+	out_32(codebuf, A_SUB_IMM(dst, src, sh, ROL(imm,sh)));
+	src = dst;
+      }
+    }
+    return 0;
+  }
+}
+
+int sub_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return add_imm(codebuf, dst, src, -imm);
+}
+
+// ECN: This is suboptimal. Need to rewrite to have 3 states
+// (set flags, preserve flags, dont care). Then rewrite
+// mov ip, #N; op dst, src, ip as op dst, src, #N; op dst, dst, #N ...
+int dop_imm_s(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm, unsigned s)
+{
+    int imm_type, rol;
+    unsigned n_op, n_imm;
+
+    JASSERT(op == DP_ADC || op == DP_ADD || op == DP_AND || op == DP_BIC || op == DP_CMN ||
+		op == DP_CMP || op == DP_EOR || op == DP_MOV || op == DP_MVN ||
+		op == DP_ORN || op == DP_ORR || op == DP_RSB || op == DP_SBC ||
+		op == DP_SUB || op == DP_TEQ || op == DP_TST, "bad op");
+    if (op == DP_CMP || op == DP_CMN || op == DP_TEQ || op == DP_TST) dst = 0x0f;
+    if (op == DP_MOV || op == DP_MVN) src = 0x0f;
+    if (Thumb2) {
+      imm_type = thumb_bytelane(imm);
+      if (imm_type >= 0) {
+	if (imm_type == 2) imm >>= 8;
+	return out_16x2(codebuf, T_DOP_BYTELANE(T_DP_IMM(op)|s, dst, src, imm_type, (imm & 0xff)));
+      }
+      rol = thumb_single_shift(imm);
+      if (rol >= 0)
+	return out_16x2(codebuf, T_DOP_ROT_IMM(T_DP_IMM(op)|s, dst, src, rol, ROL(imm, rol)));
+      n_op = N_OP(op);
+      if (n_op != (unsigned)-1) {
+	n_imm = ~imm;
+	if (op == DP_ADD || op == DP_SUB || op == DP_CMP || op == DP_CMN) n_imm = -imm;
+	imm_type = thumb_bytelane(n_imm);
+	if (imm_type >= 0) {
+	  if (imm_type == 2) n_imm >>= 8;
+	  return out_16x2(codebuf, T_DOP_BYTELANE(T_DP_IMM(n_op)|s, dst, src, imm_type, (n_imm & 0xff)));
+	}
+	rol = thumb_single_shift(n_imm);
+	if (rol >= 0)
+	  return out_16x2(codebuf, T_DOP_ROT_IMM(T_DP_IMM(n_op)|s, dst, src, rol, ROL(n_imm, rol)));
+      }
+      mov_imm(codebuf, ARM_IP, imm);
+      return out_16x2(codebuf, T_DOP_REG(T_DP_REG(op)|s, dst, src, ARM_IP, SHIFT_LSL, 0));
+    }
+    if (dst == 0x0f) dst = 0;
+    if (src == 0x0f) src = 0;
+    if (op == DP_ORN) op = DP_ORR, imm = ~imm; // no ORN in arm
+    rol = arm_single_shift(imm);
+    if (rol >= 0)
+      return out_32(codebuf, A_DOP_IMM(A_DP_IMM(op)|s, dst, src, rol, ROL(imm, rol)));
+    n_op = N_OP(op);
+    if (n_op != (unsigned)-1 && n_op != DP_ORN) {
+      n_imm = ~imm;
+      if (op == DP_ADD || op == DP_SUB || op == DP_CMP || op == DP_CMN) n_imm = -imm;
+      rol = arm_single_shift(n_imm);
+      if (rol >= 0)
+        return out_32(codebuf, A_DOP_IMM(A_DP_IMM(n_op)|s, dst, src, rol, ROL(n_imm, rol)));
+    }
+    mov_imm(codebuf, ARM_IP, imm);
+    return out_32(codebuf, A_DOP_REG(A_DP_REG(op)|s, dst, src, ARM_IP, SHIFT_LSL, 0));
+}
+
+int dop_imm(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
+{
+    return dop_imm_s(codebuf, op, dst, src, imm, 1<<20);
+}
+
+int dop_imm_preserve(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
+{
+    return dop_imm_s(codebuf, op, dst, src, imm, 0);
+}
+
+int shift_imm(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
+{
+    imm &= 31;
+    if (imm == 0)
+      return mov_reg(codebuf, dst, src);
+    if (Thumb2)
+      return out_16x2(codebuf, T_SHIFT_IMM(T_DP_IMM(op), dst, src, imm));
+    return out_32(codebuf, A_SHIFT_IMM(A_DP_IMM(op), dst, src, imm));
+}
+
+int rsb_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  if (Thumb2 && dst < ARM_R8 && src < ARM_R8 && imm == 0)
+    return out_16(codebuf, T_NEG(dst, src));
+  return dop_imm(codebuf, DP_RSB, dst, src, imm);
+}
+
+int adc_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return dop_imm(codebuf, DP_ADC, dst, src, imm);
+}
+
+int asr_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return shift_imm(codebuf, DP_ASR, dst, src, imm);
+}
+
+int lsl_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return shift_imm(codebuf, DP_LSL, dst, src, imm);
+}
+
+int eor_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return dop_imm(codebuf, DP_EOR, dst, src, imm);
+}
+
+int and_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return dop_imm(codebuf, DP_AND, dst, src, imm);
+}
+
+int orr_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
+{
+  return dop_imm(codebuf, DP_ORR, dst, src, imm);
+}
+
+int cmp_imm(CodeBuf *codebuf, Reg src, u32 imm)
+{
+  if (Thumb2 && src < ARM_R8 && imm < 256)
+    return out_16(codebuf, T_CMP_IMM(src, imm));
+  return dop_imm(codebuf, DP_CMP, 0x0f, src, imm);
+}
+
+int tst_imm(CodeBuf *codebuf, Reg src, u32 imm)
+{
+  return dop_imm(codebuf, DP_TST, 0x0f, src, imm);
+}
+
+int fullBarrier(CodeBuf *codebuf)
+{
+  if (os::is_MP()) {
+    if (Thumb2)
+      return out_16x2(codebuf, T_DMB(0xf));
+    return out_32(codebuf, A_DMB(0xf));
+  }
+}
+
+int storeBarrier(CodeBuf *codebuf)
+{
+  if (os::is_MP()) {
+    if (Thumb2)
+      return out_16x2(codebuf, T_DMB(0xe));
+    return out_32(codebuf, A_DMB(0xe));
+  }
+}
+
+int tbh(CodeBuf *codebuf, Reg base, Reg idx)
+{
+  if (Thumb2)
+    return out_16x2(codebuf, T_TBH(base, idx));
+  // For the moment we emulate the behaviour of TBH in arm code
+  // It may be better to review the tableswitch generation sometime
+  //   	lsl	ip, idx, #1
+  //	ldrh	ip, [pc, ip]
+  //	add	pc, pc, ip, lsl #1
+  lsl_imm(codebuf, ARM_IP, idx, 1);
+  ldrh_reg(codebuf, ARM_IP, ARM_PC, ARM_IP, 1, 0);
+  // return add_reg(codebuf, ARM_PC, ARM_PC, ARM_IP);
+  return dop_reg(codebuf, DP_ADD, ARM_PC, ARM_PC, ARM_IP, SHIFT_LSL, 1);
+}
+
+int umull(CodeBuf *codebuf, u32 res_lo, u32 res_hi, u32 lho, u32 rho)
+{
+  return out_armthumb32(codebuf, Thumb2 ? T_UMULL(res_lo, res_hi, lho, rho) :
+                                      A_UMULL(res_lo, res_hi, lho, rho));
+}
+
+int mla(CodeBuf *codebuf, u32 res, u32 lho, u32 rho, u32 a)
+{
+  return out_armthumb32(codebuf, Thumb2 ? T_MLA(res, lho, rho, a) :
+                                      A_MLA(res, lho, rho, a));
+}
+
+#define NEG_COND(cond)	((cond) ^ 1)
+
+#define T_B(uoff)	(0xe000 | ((uoff) & 0x7ff))
+#define T_BW(uoff)	(0xf0009000 | \
+			  (((uoff) & (1<<23)) << (26-23)) | \
+			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
+			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
+			  (((uoff) & 0x1ff800) << (16-11)) | \
+			  ((uoff) & 0x7ff))
+#define T_BL(uoff)	(0xf000d000 | \
+			  (((uoff) & (1<<23)) << (26-23)) | \
+			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
+			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
+			  (((uoff) & 0x1ff800) << (16-11)) | \
+			  ((uoff) & 0x7ff))
+#define T_BLX(uoff)	(0xf000c000 | \
+			  (((uoff) & (1<<23)) << (26-23)) | \
+			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
+			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
+			  (((uoff) & 0x1ff800) << (16-11)) | \
+			  ((uoff) & 0x7ff))
+#define T_BCC(cond, uoff) (0xd000 | (conds[cond] << 8) | ((uoff) & 0xff))
+#define T_BCCW(cond, uoff) (0xf0008000 | \
+			     (conds[cond] << 22) | \
+			     (((uoff) & (1<<19)) << (26-19)) | \
+			     (((uoff) & (1<<18)) >> (18-11)) | \
+			     (((uoff) & (1<<17)) >> (17-13)) | \
+			     (((uoff) & 0x1f800) << (16-11)) | \
+			     ((uoff) & 0x7ff))
+#define T_BLX_REG(r)	(0x4780 | ((r) << 3))
+#define T_CBZ(r, uoff)	(0xb100 | (((uoff) & 0x1f) << 3) | (((uoff) & 0x20) << (8-5)) | ((r) & 7))
+#define T_CBNZ(r, uoff)	(0xb900 | (((uoff) & 0x1f) << 3) | (((uoff) & 0x20) << (8-5)) | ((r) & 7))
+
+#define A_B(uoff)	 (0xea000000 | ((uoff) & 0xffffff))
+#define A_BL(cond, uoff) (0x0b000000 | (conds[cond] << 28) | ((uoff) & 0xffffff))
+#define A_BCC(cond, uoff) (0x0a000000 | (conds[cond] << 28) | ((uoff) & 0xffffff))
+#define A_BLX_REG(r)     (0xe12fff30 | (r))
+
+#define PATCH(loc)	do {						\
+	  unsigned oldidx = codebuf->idx;				\
+	  codebuf->idx = (loc) >> 1;					\
+
+#define HCTAP								\
+	  codebuf->idx = oldidx;					\
+    	} while (0)
+
+int forward_short(CodeBuf *codebuf)
+{
+  int loc = out_loc(codebuf);
+  if (Thumb2) out_16(codebuf, UNDEFINED_16);
+  else out_32(codebuf, UNDEFINED_32);
+  return loc;
+}
+
+int forward_long(CodeBuf *codebuf)
+{
+  int loc = out_loc(codebuf);
+  out_32(codebuf, UNDEFINED_32);
+  return loc;
+}
+
+int forward_cb(CodeBuf *codebuf)
+{
+  int loc = out_loc(codebuf);
+  if (Thumb2) return forward_short(codebuf);
+  out_32(codebuf, UNDEFINED_32);
+  out_32(codebuf, UNDEFINED_32);
+  return loc;
+}
+
+int branch_uncond(CodeBuf *codebuf, unsigned dest)
+{
+  unsigned loc = (codebuf->idx * 2);
+  int offset;
+  unsigned uoff;
+
+  if (Thumb2) {
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<10) && offset < (1<<10))
+      return out_16(codebuf, T_B(uoff));
+    if (offset >= -(1<<23) && offset < (1<<23))
+      return out_16x2(codebuf, T_BW(uoff));
+  } else {
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<22) && offset < (1<<22)) 
+      return out_32(codebuf, A_B(uoff));
+  }
+  J_Unimplemented();
+}
+
+int branch_uncond_patch(CodeBuf *codebuf, unsigned loc, unsigned dest)
+{
+  int offset;
+  unsigned uoff;
+  unsigned oldidx;
+  int rc;
+
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  if (Thumb2) {
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    if (offset >= -(1<<23) && offset < (1<<23)) {
+      uoff = offset & ((1<<24)-1);
+      rc = out_16x2(codebuf, T_BW(uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  } else {
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    if (offset >= -(1<<22) && offset < (1<<22)) {
+      uoff = offset;
+      rc = out_32(codebuf, A_B(uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  }
+  J_Unimplemented();
+}
+
+int branch_narrow_patch(CodeBuf *codebuf, unsigned loc)
+{
+  int offset;
+  unsigned uoff;
+  unsigned oldidx;
+  unsigned dest;
+  int rc;
+
+  dest = codebuf->idx * 2;
+  if (!Thumb2) return branch_uncond_patch(codebuf, loc, dest);
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  loc += 4;
+  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+  dest >>= 1;
+  loc >>= 1;
+  offset = dest - loc;
+  uoff = offset;
+  if (offset >= -(1<<10) && offset < (1<<10)) {
+    rc = out_16(codebuf, T_B(uoff));
+    codebuf->idx = oldidx;
+    return rc;
+  }
+  J_Unimplemented();
+}
+
+int branch(CodeBuf *codebuf, unsigned cond, unsigned dest)
+{
+  unsigned loc = (codebuf->idx * 2);
+  int offset;
+  unsigned uoff;
+
+  if (Thumb2) {
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<7) && offset < (1<<7)) {
+      return out_16(codebuf, T_BCC(cond, uoff));
+    }
+    if (offset >= -(1<<19) && offset < (1<<19)) {
+      return out_16x2(codebuf, T_BCCW(cond, uoff));
+    }
+  } else {
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<22) && offset < (1<<22)) 
+      return out_32(codebuf, A_BCC(cond, uoff));
+  }
+  J_Unimplemented();
+}
+
+int bcc_patch(CodeBuf *codebuf, unsigned cond, unsigned loc)
+{
+  int offset;
+  unsigned uoff;
+  unsigned oldidx;
+  unsigned dest;
+  int rc;
+
+  dest = codebuf->idx * 2;
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  if (Thumb2) {
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest-loc;
+    if (offset >= -(1<<7) && offset < (1<<7)) {
+      uoff = offset;
+      rc = out_16(codebuf, T_BCC(cond, uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  } else {
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    if (offset >= -(1<<22) && offset < (1<<22)) {
+      uoff = offset;
+      rc = out_32(codebuf, A_BCC(cond, uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  }
+  J_Unimplemented();
+}
+
+int bl(CodeBuf *codebuf, unsigned dest, unsigned cond = COND_AL)
+{
+  int offset;
+  unsigned uoff;
+
+  if (Thumb2) {
+    if (cond != COND_AL) it(codebuf, cond, IT_MASK_T);
+    unsigned loc = (unsigned)&codebuf->codebuf[codebuf->idx] + 4;
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    if (offset >= -(1<<23) && offset < (1<<23)) {
+      uoff = offset;
+      return out_16x2(codebuf, T_BL(uoff));
+    }
+  } else {
+    unsigned loc = (unsigned)&codebuf->codebuf[codebuf->idx] + 8;
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    uoff = offset;
+    if (offset >= -(1<<22) && offset < (1<<22))
+      return out_32(codebuf, A_BL(cond, uoff));
+  }
+  J_Unimplemented();
+}
+
+int blx(CodeBuf *codebuf, unsigned dest)
+{
+  unsigned loc = (unsigned)&codebuf->codebuf[codebuf->idx] + 4;
+  int offset;
+  unsigned uoff;
+
+  if (!Thumb2) return bl(codebuf, dest); // Already in ARM
+  JASSERT((dest & 3) == 0 && (loc & 1) == 0, "unaligned code");
+  dest >>= 1;
+  loc >>= 1;
+  loc &= ~1;
+  offset = dest - loc;
+  if (offset >= -(1<<23) && offset < (1<<23)) {
+    uoff = offset;
+    return out_16x2(codebuf, T_BLX(uoff));
+  }
+  J_Unimplemented();
+}
+
+int branch_patch(CodeBuf *codebuf, unsigned cond, unsigned loc, unsigned dest)
+{
+  int offset;
+  unsigned uoff;
+  unsigned oldidx;
+  int rc;
+
+  oldidx = codebuf->idx;
+  if (Thumb2) {
+    codebuf->idx = loc >> 1;
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    loc += 4;
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest - loc;
+    if (offset >= -(1<<19) && offset < (1<<19)) {
+      uoff = offset & ((1<<20)-1);
+      rc = out_16x2(codebuf, T_BCCW(cond, uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  } else {
+    codebuf->idx = loc >> 1;
+    JASSERT((dest & 3) == 0 && (loc & 3) == 0, "unaligned code");
+    loc += 8;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    if (offset >= -(1<<22) && offset < (1<<22)) {
+      uoff = offset;
+      rc = out_32(codebuf, A_BCC(cond, uoff));
+      codebuf->idx = oldidx;
+      return rc;
+    }
+  }
+  J_Unimplemented();
+}
+
+int blx_reg(CodeBuf *codebuf, Reg r)
+{
+  if (Thumb2)
+    return out_16(codebuf, T_BLX_REG(r));
+  return out_32(codebuf, A_BLX_REG(r));
+}
+
+int cbz_patch(CodeBuf *codebuf, Reg r, unsigned loc)
+{
+  unsigned offset;
+  unsigned oldidx;
+  unsigned dest;
+  int rc;
+
+  dest = codebuf->idx * 2;
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  if (Thumb2) {
+    loc += 4;
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest-loc;
+    JASSERT(r < ARM_R8 && offset < 64, "must be");
+    out_16(codebuf, T_CBZ(r, offset));
+  } else {
+    cmp_imm(codebuf, r, 0);
+    loc += 12;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    JASSERT(offset < (1<<22), "???");
+    out_32(codebuf, A_BCC(COND_EQ, offset));
+  }
+  codebuf->idx = oldidx;
+}
+
+int cbnz_patch(CodeBuf *codebuf, Reg r, unsigned loc)
+{
+  unsigned offset;
+  unsigned oldidx;
+  unsigned dest;
+  int rc;
+
+  dest = codebuf->idx * 2;
+  oldidx = codebuf->idx;
+  codebuf->idx = loc >> 1;
+  if (Thumb2) {
+    loc += 4;
+    JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
+    dest >>= 1;
+    loc >>= 1;
+    offset = dest-loc;
+    JASSERT(r < ARM_R8 && offset < 64, "must be");
+    out_16(codebuf, T_CBNZ(r, offset));
+  } else {
+    cmp_imm(codebuf, r, 0);
+    loc += 12;
+    dest >>= 2;
+    loc >>= 2;
+    offset = dest - loc;
+    JASSERT(offset < (1<<22), "???");
+    out_32(codebuf, A_BCC(COND_NE, offset));
+  }
+  codebuf->idx = oldidx;
+}
+
+int chka(CodeBuf *codebuf, u32 size, u32 idx)
+{
+  cmp_reg(codebuf, idx, size);
+  bl(codebuf, handlers[H_ARRAYBOUND], COND_CS);
+}
+
+//-----------------------------------------------------------------------------------
+
+// An example of some debugging logic that you can use to trigger a
+// breakpoint when a particular method is executing.
+#define EQ(S1, S2) (S1 && (strncmp(S1, S2, strlen(S2)) == 0))
+extern "C" void Debug(interpreterState istate)
+{
+  char valuebuf[8192];
+  istate->method()->name_and_sig_as_C_string(valuebuf, sizeof valuebuf);
+  if (EQ(valuebuf, "java.util.Hashtable.get(Ljava/lang/Object;)")
+      // && istate->method()->bci_from(istate->bcp()) == 45
+      ) {
+    asm("nop");
+  }
+}
+#undef EQ
+
+void Thumb2_Push_Multiple(CodeBuf *codebuf, Reg *regs, unsigned nregs)
+{
+  unsigned regset = 0;
+  unsigned regmask;
+  unsigned i;
+  Reg r;
+
+  JASSERT(nregs > 0, "nregs must be > 0");
+  if (nregs == 1) {
+    str_imm_wb(codebuf, regs[0], Rstack, -4, 1, 1);
+    return;
+  }
+  for (i = 0; i < nregs; i++) {
+    r = regs[i];
+    if (!IS_ARM_INT_REG(r)) J_Unimplemented();
+    regmask = 1<<r;
+    if (regset != 0 && regmask >= (regset & -regset)) {
+      stm(codebuf, regset, Rstack, PUSH_FD, 1);
+      regset = 0;
+    }
+    regset |= regmask;
+  }
+  stm(codebuf, regset, Rstack, PUSH_FD, 1);
+}
+
+void Thumb2_Pop_Multiple(CodeBuf *codebuf, Reg *regs, unsigned nregs)
+{
+  unsigned regset = 0;
+  unsigned regmask;
+  unsigned i;
+  Reg r;
+
+  if (nregs == 0)
+    return;
+  JASSERT(nregs > 0, "nregs must be > 0");
+  if (nregs == 1) {
+    ldr_imm_wb(codebuf, regs[0], Rstack, 4, 0, 1);
+    return;
+  }
+  i = nregs;
+  do {
+    i--;
+    r = regs[i];
+    if (!IS_ARM_INT_REG(r)) J_Unimplemented();
+    regmask = 1<<r;
+    if (regmask <= (regset & -regset)) {
+      ldm(codebuf, regset, Rstack, POP_FD, 1);
+      regset = 0;
+    }
+    regset |= regmask;
+  } while (i > 0);
+  ldm(codebuf, regset, Rstack, POP_FD, 1);
+}
+
+int mov_multiple(CodeBuf *codebuf, Reg *dst, Reg *src, unsigned nregs)
+{
+  unsigned u, n, p;
+  unsigned smask = 0;
+  unsigned dmask = 0;
+  unsigned free_mask, free_reg;
+
+  for (u = 0, n = 0; u < nregs; u++) {
+    JASSERT(dst[u] != ARM_IP, "mov_multiple cannot be used for ARM_IP");
+    JASSERT(src[u] != ARM_IP, "mov_multiple cannot be used for ARM_IP");
+    if (dst[u] != src[u]) {
+      dst[n] = dst[u];
+      src[n++] = src[u];
+    }
+  }
+  while (n) {
+    // Find a reg which is in the dst reg set but not the src reg set
+    smask = 0;
+    dmask = 0;
+    for (u = 0; u < n; u++) {
+      smask |= (1 << src[u]);
+      dmask |= (1 << dst[u]);
+    }
+    free_mask = dmask & ~smask;
+    if (!free_mask) {
+      // No such reg => must use IP
+      Reg r = dst[0];
+      mov_reg(codebuf, ARM_IP, r);
+      for (u = 0; u < n; u++) {
+	if (src[u] == r) src[u] = ARM_IP;
+      }
+      smask ^= (1<<r) | (1<<ARM_IP);
+      free_mask = dmask & ~smask;
+      JASSERT(free_mask, "still no free reg after using ARM_IP?");
+    }
+    free_reg = LOG2(free_mask);
+    for (u = 0, p = 0; u < n; u++) {
+      if (dst[u] == free_reg) {
+	mov_reg(codebuf, dst[u], src[u]);
+      } else {
+	dst[p] = dst[u];
+	src[p++] = src[u];
+      }
+    }
+    n--;
+  }
+  return 0;
+}
+
+#define TOS(jstack)	((jstack)->stack[(jstack)->depth-1])
+#define TOSM1(jstack)	((jstack)->stack[(jstack)->depth-2])
+#define TOSM2(jstack)	((jstack)->stack[(jstack)->depth-3])
+#define TOSM3(jstack)	((jstack)->stack[(jstack)->depth-4])
+
+#define SWAP(jstack) do { \
+		      Reg r = (jstack)->stack[(jstack)->depth-1]; \
+		      (jstack)->stack[(jstack)->depth-1] = (jstack)->stack[(jstack)->depth-2]; \
+		      (jstack)->stack[(jstack)->depth-2] = r; \
+		    } while (0)
+
+#define JSTACK_REG(jstack)		jstack_reg(jstack)
+#define JSTACK_PREFER(jstack, prefer)	jstack_prefer(jstack, prefer)
+
+int PUSH(Thumb2_Stack *jstack, unsigned reg) {
+  jstack->stack[jstack->depth] = reg;
+  jstack->depth++;
+  return reg;
+}
+
+int POP(Thumb2_Stack *jstack) {
+  jstack->depth--;
+  return jstack->stack[jstack->depth];
+}
+
+static const unsigned last_clear_bit[] = {
+	3,	//	0000
+	3,	//	0001
+	3,	//	0010
+	3,	//	0011
+	3,	//	0100
+	3,	//	0101
+	3,	//	0110
+	3,	//	0111
+	2,	//	1000
+	2,	//	1001
+	2,	//	1010
+	2,	//	1011
+	1,	//	1100
+	1,	//	1101
+	0,	//	1110
+	0,	//	1111 // No registers available...
+};
+
+#define LAST_CLEAR_BIT(mask) last_clear_bit[mask]
+
+unsigned long thumb2_register_allocation_failures = 0;
+
+unsigned jstack_reg(Thumb2_Stack *jstack)
+{
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask = 0;
+  unsigned r;
+  unsigned i;
+
+  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
+  mask &= (1 << STACK_REGS) - 1;
+  if (mask >= (1 << STACK_REGS) - 1)  { // No free registers
+    thumb2_register_allocation_failures++;
+    J_BogusImplementation();
+  }
+  r = LAST_CLEAR_BIT(mask);
+  return r;
+}
+
+unsigned jstack_prefer(Thumb2_Stack *jstack, Reg prefer)
+{
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask = 0;
+  unsigned r;
+  unsigned i;
+
+  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
+  mask &= (1 << STACK_REGS) - 1;
+  if ((prefer & ~mask) & 0x0f) mask |= (~prefer & ((1 << STACK_REGS) - 1));
+  if (mask >= (1 << STACK_REGS) - 1)  { // No free registers
+    thumb2_register_allocation_failures++;
+    J_BogusImplementation();
+  }
+  r = LAST_CLEAR_BIT(mask);
+  return r;
+}
+
+void Thumb2_Fill(Thumb2_Info *jinfo, unsigned required)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask = 0;
+  unsigned tofill;
+  unsigned r, i;
+
+  if (depth >= required) return;
+  tofill = required - depth;
+  for (i = depth; i > 0;) {
+    i--;
+    mask |= 1 << stack[i];
+    stack[i+tofill] = stack[i];
+  }
+  mask &= (1 << STACK_REGS) - 1;
+  for (i = 0; i < tofill; i++) {
+    JASSERT(mask != (1 << STACK_REGS) - 1, "Fill failed!!!");
+    r = LAST_CLEAR_BIT(mask);
+    mask |= (1 << r);
+    stack[i] = r;
+  }
+  jstack->depth = depth + tofill;
+  Thumb2_Pop_Multiple(jinfo->codebuf, stack, tofill);
+}
+
+static const unsigned bitcount[] = {
+	0,	// 0000
+	1,	// 0001
+	1,	// 0010
+	2,	// 0011
+	1,	// 0100
+	2,	// 0101
+	2,	// 0110
+	3,	// 0111
+	1,	// 1000
+	2,	// 1001
+	2,	// 1010
+	3,	// 1011
+	2,	// 1100
+	3,	// 1101
+	3,	// 1110
+	4,	// 1111
+};
+
+#define BITCOUNT(mask) bitcount[mask]
+
+// Thumb2_Spill:-
+// 	required - ensure that at least this many registers are available
+// 	exclude - bitmask, do not count these registers as available
+//
+// 	The no. of available regs (STACK_REGS) less the no. of registers in
+// 	exclude must be >= the number required, otherwise this function loops!
+//
+// 	Typical usage is
+//
+// 	Thumb2_Spill(jinfo, 2, 0);	// get 2 free regs
+// 	r_res_lo = PUSH(jinfo->jstack, JSTACK_REG(jinfo->jstack));
+// 	r_res_hi = PUSH(jinfo->jstack, JSTACK_REG(jinfo->jstack));
+//
+//	Use the exclude mask when you do not want a subsequent call to
+//	JSTACK_REG to return a particular register or registers. This can
+//	be useful, for example, with long (64) bit operations. Eg. In the
+//	following we use it to ensure that the hi inputs are not clobbered
+//	by the lo result as part of the intermediate calculation.
+//
+//	Thumb2_Fill(jinfo, 4);
+//	exclude = (1<<rho_hi)|(1<<lho_hi);
+//	rho_lo = POP(jstack);
+//	rho_hi = POP(jstack);
+//	lho_lo = POP(jstack);
+//	lho_hi = POP(jstack);
+//	Thumb2_Spill(jinfo, 2, exclude);
+//	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~exclude));	// != rho_hi or lho_hi
+//	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~exclude));	// != rho_hi or lho_hi
+//	dop_reg(jinfo->codebuf, DP_ADD, res_lo, lho_lo, rho_lo, SHIFT_LSL, 0); 
+//	dop_reg(jinfo->codebuf, DP_ADC, res_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
+//	
+void Thumb2_Spill(Thumb2_Info *jinfo, unsigned required, unsigned exclude)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask;
+  unsigned i;
+  unsigned tospill = 0;
+
+  exclude &= (1 << STACK_REGS) - 1;
+  if (depth <= (STACK_REGS - required) && exclude == 0) return;
+  while (1) {
+    mask = 0;
+    for (i = tospill; i < depth; i++) mask |= 1 << stack[i];
+    mask &= ((1 << STACK_REGS) - 1);
+    mask |= exclude;
+    if (STACK_REGS - BITCOUNT(mask) >= required) break;
+    tospill++;
+  }
+  if (tospill == 0) return;
+  Thumb2_Push_Multiple(jinfo->codebuf, stack, tospill);
+  for (i = tospill; i < depth; i++)
+    stack[i-tospill] = stack[i];
+  jstack->depth = depth - tospill;
+  JASSERT((int)jstack->depth >= 0, "Stack underflow");
+}
+
+// Thumb2_Tmp:-
+// 	Allocate a temp reg for use in local code generation.
+// 	exclude is a bit mask of regs not to use.
+// 	A max of 2 regs can be guaranteed (ARM_IP & ARM_LR)
+// 	If allocating 2 regs you must include the reg you got the
+// 	first time in the exclude list. Otherwise you just get
+// 	the same reg again.
+Reg Thumb2_Tmp(Thumb2_Info *jinfo, unsigned exclude)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned mask;
+  unsigned i;
+
+  mask = 0;
+  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
+  mask |= exclude;
+  for (i = 0; i < STACK_REGS; i++)
+    if ((mask & (1<<i)) == 0) return i;
+  if ((mask & (1<<ARM_IP)) == 0) return ARM_IP;
+  if ((mask & (1<<ARM_LR)) == 0) return ARM_LR;
+  JASSERT(0, "failed to allocate a tmp reg");
+}
+
+void Thumb2_Flush(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+
+  if (jstack->depth > 0)
+    Thumb2_Push_Multiple(jinfo->codebuf, jstack->stack, jstack->depth);
+  jstack->depth = 0;
+}
+
+// SAVE_STACK and RESTORE_STACK save the stack state so that it's
+// possible to do a stack flush to memory and restore that stack state
+// to the same registers.
+#define SAVE_STACK(JSTACK)					\
+  unsigned saved_stack_elements[JSTACK->depth];			\
+  unsigned saved_stack_depth;					\
+  memcpy(saved_stack_elements, JSTACK->stack,			\
+	 JSTACK->depth * sizeof saved_stack_elements[0]);	\
+  saved_stack_depth = JSTACK->depth;
+#define RESTORE_STACK(JSTACK, CODEBUF)					\
+  Thumb2_Pop_Multiple(CODEBUF, saved_stack_elements, saved_stack_depth); \
+  memcpy(JSTACK->stack, saved_stack_elements,				\
+	 JSTACK->depth * sizeof saved_stack_elements[0]);		\
+  JSTACK->depth = saved_stack_depth;
+
+// Call this when we are about to corrupt a local
+// The local may already be on the stack
+// For example
+// 	iload	0
+// 	iconst	2
+// 	istore	0
+// 	istore	1
+// Without this check the code generated would be (r4 is local 0, r5 is local 1)
+// 	mov	r4, #2
+//	mov	r5, r4
+// With this check the code should be
+// 	mov	r3, r4
+// 	mov	r4, #2
+// 	mov	r5, r3
+// This is not ideal, but is better than the previous:-)
+//
+void Thumb2_Corrupt(Thumb2_Info *jinfo, unsigned r, unsigned ignore)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth = jstack->depth;
+  unsigned r_new, mask;
+  unsigned i;
+
+  if (ignore >= depth) return;
+//  JASSERT(depth >= ignore, "Cant ignore more than the whole stack!!");
+  if (IS_SREG(r)) return;
+  depth -= ignore;
+  for (i = 0; i < depth; i++) {
+    if (r == stack[i]) {
+      Thumb2_Spill(jinfo, 1, 0);
+      depth = jstack->depth - ignore;
+      r_new = JSTACK_REG(jstack);
+      mov_reg(jinfo->codebuf, r_new, r);
+      for (i = 0; i < depth; i++) if (r == stack[i]) stack[i] = r_new;
+      break;
+    }
+  }
+}
+
+unsigned Thumb2_ResultLocal(Thumb2_Info *jinfo, unsigned bci)
+{
+  unsigned opc = jinfo->code_base[bci];
+  if (jinfo->bc_stackinfo[bci] & BC_BRANCH_TARGET) return 0;
+  if (opc < opc_istore || opc > opc_astore_3) return 0;
+  if (opc == opc_istore || opc == opc_fstore || opc == opc_astore)
+    return jinfo->jregs->r_local[jinfo->code_base[bci+1]];
+  if ((opc >= opc_istore_0 && opc <= opc_istore_3) ||
+	(opc >= opc_fstore_0 && opc <= opc_fstore_3) ||
+	(opc >= opc_astore_0 && opc <= opc_astore_3))
+    return jinfo->jregs->r_local[(opc-opc_istore_0)&3];
+  return 0;
+}
+
+static const unsigned char dOps[] = {
+	DP_ADD, DP_ADC, VP_ADD, VP_ADD,
+	DP_SUB, DP_SBC, VP_SUB, VP_SUB,
+	DP_MUL, 0, VP_MUL, VP_MUL,
+	0, 0, VP_DIV, VP_DIV,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	DP_LSL, 0,
+	DP_ASR, 0,
+	DP_LSR, 0,
+	DP_AND, DP_AND, DP_ORR, DP_ORR, DP_EOR, DP_EOR,
+};
+
+unsigned Thumb2_Imm(Thumb2_Info *jinfo, unsigned imm, unsigned next_bci)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r;
+  unsigned next_op;
+
+  if (!(jinfo->bc_stackinfo[next_bci] & BC_BRANCH_TARGET)) {
+    next_op = jinfo->code_base[next_bci];
+    if (next_op > OPC_LAST_JAVA_OP) {
+      if (Bytecodes::is_defined((Bytecodes::Code)next_op))
+	next_op = (unsigned)Bytecodes::java_code((Bytecodes::Code)next_op);
+    }
+    switch (next_op) {
+      case opc_istore:
+      case opc_fstore:
+      case opc_astore: {
+	unsigned local = jinfo->code_base[next_bci+1];
+	r = jinfo->jregs->r_local[local];
+	if (r) {
+	  Thumb2_Corrupt(jinfo, r, 0);
+	  mov_imm(jinfo->codebuf, r, imm);
+	  return 2;
+	}
+	break;
+      }
+      case opc_istore_0:
+      case opc_istore_1:
+      case opc_istore_2:
+      case opc_istore_3:
+      case opc_fstore_0:
+      case opc_fstore_1:
+      case opc_fstore_2:
+      case opc_fstore_3:
+      case opc_astore_0:
+      case opc_astore_1:
+      case opc_astore_2:
+      case opc_astore_3: {
+	unsigned local = (jinfo->code_base[next_bci]-opc_istore_0) & 3;
+	r = jinfo->jregs->r_local[local];
+	if (r) {
+	  Thumb2_Corrupt(jinfo, r, 0);
+	  mov_imm(jinfo->codebuf, r, imm);
+	  return 1;
+	}
+	break;
+      }
+      case opc_iadd:
+      case opc_isub:
+      case opc_ishl:
+      case opc_ishr:
+      case opc_iushr:
+      case opc_iand:
+      case opc_ior:
+      case opc_ixor: {
+	unsigned len = 0;
+	unsigned r_lho;
+
+	Thumb2_Fill(jinfo, 1);
+	r_lho = POP(jstack);
+
+	r = Thumb2_ResultLocal(jinfo, next_bci+1);
+	if (r) {
+	  Thumb2_Corrupt(jinfo, r, 0);
+	  len = Bytecodes::length_for((Bytecodes::Code)jinfo->code_base[next_bci+1]);
+	} else {
+	  Thumb2_Spill(jinfo, 1, 0);
+	  r = JSTACK_REG(jstack);
+	  PUSH(jstack, r);
+	}
+	if (next_op == opc_ishl || next_op == opc_ishr || next_op == opc_iushr)
+	  shift_imm(jinfo->codebuf, dOps[next_op-opc_iadd], r, r_lho, imm);
+	else
+	  dop_imm(jinfo->codebuf, dOps[next_op-opc_iadd], r, r_lho, imm);
+	return 1+len;
+      }
+
+      case opc_idiv: {
+	unsigned len = 0;
+	unsigned r_lho;
+	unsigned abs_imm = abs((int)imm);
+
+	if ((imm & -imm) == abs_imm) {
+	  unsigned l2_imm = LOG2(abs_imm);
+	  unsigned r_lho;
+
+	  if (imm == 0) break;
+	  if (imm == 1) return 1;
+
+	  Thumb2_Fill(jinfo, 1);
+	  r_lho = POP(jstack);
+
+	  r = Thumb2_ResultLocal(jinfo, next_bci+1);
+	  if (r) {
+	    Thumb2_Corrupt(jinfo, r, 0);
+	    len = Bytecodes::length_for((Bytecodes::Code)jinfo->code_base[next_bci+1]);
+	  } else {
+	    Thumb2_Spill(jinfo, 1, 0);
+	    r = JSTACK_REG(jstack);
+	    PUSH(jstack, r);
+	  }
+
+	  if (abs_imm != 1) {
+	    unsigned r_tmp = r_lho;
+	    if (abs_imm != 2) {
+	      r_tmp = Thumb2_Tmp(jinfo, (1<<r_lho));
+	      asr_imm(jinfo->codebuf, r_tmp, r_lho, 31);
+	    }
+	    add_reg_shift(jinfo->codebuf, r, r_lho, r_tmp, SHIFT_LSR, 32-l2_imm);
+	    asr_imm(jinfo->codebuf, r, r, l2_imm);
+	  }
+	  if ((int)imm < 0)
+	    rsb_imm(jinfo->codebuf, r, r, 0);
+	  return 1+len;
+	}
+	break;
+      }
+    }
+  }
+  Thumb2_Spill(jinfo, 1, 0);
+  r = JSTACK_REG(jstack);
+  PUSH(jstack, r);
+  mov_imm(jinfo->codebuf, r, imm);
+  return 0;
+}
+
+void Thumb2_ImmX2(Thumb2_Info *jinfo, unsigned lo, unsigned hi)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+
+  Thumb2_Spill(jinfo, 2, 0);
+  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+  mov_imm(jinfo->codebuf, r_lo, lo);
+  mov_imm(jinfo->codebuf, r_hi, hi);
+}
+
+#define LOCAL_OFFSET(local, stackdepth, nlocals) ((stackdepth)*4 + FRAME_SIZE + ((nlocals)-1-(local))*4)
+#define ISTATE_REG(jinfo)	  ((jinfo)->use_istate ? Ristate : Rstack)
+#define ISTATE(jinfo, stackdepth) ((jinfo)->use_istate ? 0 : (((stackdepth)-(jinfo)->jstack->depth)*4))
+#define ISTATE_OFFSET(jinfo, stackdepth, offset) (ISTATE(jinfo, stackdepth) + (offset))
+
+void load_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  if (jinfo->use_istate)
+    ldr_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4);
+  else
+    ldr_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals));
+}
+
+void store_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  if (jinfo->use_istate)
+    str_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4);
+  else
+    str_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals));
+}
+
+void load_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
+{
+  if (jinfo->use_istate)
+    ldr_imm(jinfo->codebuf, r, Ristate, istate_offset);
+  else
+    ldr_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset));
+}
+
+void store_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
+{
+  if (jinfo->use_istate)
+    str_imm(jinfo->codebuf, r, Ristate, istate_offset);
+  else
+    str_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset));
+}
+
+void Thumb2_Load(Thumb2_Info *jinfo, int local, unsigned stackdepth)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r;
+
+  r = jinfo->jregs->r_local[local];
+  if (r) {
+    PUSH(jstack, r);
+  } else {
+    int nlocals = jinfo->method->max_locals();
+
+    Thumb2_Spill(jinfo, 1, 0);
+    JASSERT(stackdepth >= jstack->depth, "negative stack offset?");
+    stackdepth -= jstack->depth;
+    r = JSTACK_REG(jstack);
+    PUSH(jstack, r);
+    load_local(jinfo, r, local, stackdepth);
+  }
+}
+
+void Thumb2_LoadX2(Thumb2_Info *jinfo, int local, unsigned stackdepth)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+  int nlocals = jinfo->method->max_locals();
+
+  r_hi = jinfo->jregs->r_local[local];
+  if (r_hi) {
+    r_lo = jinfo->jregs->r_local[local+1];
+    if (r_lo) {
+      PUSH(jstack, r_hi);
+      PUSH(jstack, r_lo);
+    } else {
+      Thumb2_Spill(jinfo, 1, 0);
+      stackdepth -= jstack->depth;
+      PUSH(jstack, r_hi);
+      r_lo = PUSH(jstack, JSTACK_REG(jstack));
+      load_local(jinfo, r_lo, local+1, stackdepth);
+    }
+  } else {
+    r_lo = jinfo->jregs->r_local[local+1];
+    if (r_lo) {
+      Thumb2_Spill(jinfo, 1, 0);
+      stackdepth -= jstack->depth;
+      r_hi = PUSH(jstack, JSTACK_REG(jstack));
+      load_local(jinfo, r_hi, local, stackdepth);
+      PUSH(jstack, r_lo);
+    } else {
+      Thumb2_Spill(jinfo, 2, 0);
+      stackdepth -= jstack->depth;
+      r_hi = PUSH(jstack, JSTACK_REG(jstack));
+      r_lo = PUSH(jstack, JSTACK_REG(jstack));
+      load_local(jinfo, r_hi, local, stackdepth);
+      load_local(jinfo, r_lo, local+1, stackdepth);
+    }
+  }
+}
+
+void Thumb2_Store(Thumb2_Info *jinfo, int local, unsigned stackdepth)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r, r_local;
+  int nlocals = jinfo->method->max_locals();
+
+  Thumb2_Fill(jinfo, 1);
+  stackdepth -= jstack->depth;
+  r = POP(jstack);
+  r_local = jinfo->jregs->r_local[local];
+  if (r_local) {
+    Thumb2_Corrupt(jinfo, r_local, 0);
+    mov_reg(jinfo->codebuf, r_local, r);
+  } else {
+    store_local(jinfo, r, local, stackdepth);
+  }
+}
+
+void Thumb2_StoreX2(Thumb2_Info *jinfo, int local, unsigned stackdepth)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+  unsigned r_local_lo, r_local_hi;
+  int nlocals = jinfo->method->max_locals();
+
+  Thumb2_Fill(jinfo, 2);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  stackdepth -= 2;
+
+  r_local_hi = jinfo->jregs->r_local[local];
+  if (r_local_hi) {
+    Thumb2_Corrupt(jinfo, r_local_hi, 0);
+    mov_reg(jinfo->codebuf, r_local_hi, r_hi);
+  } else {
+    store_local(jinfo, r_hi, local, stackdepth-jstack->depth);
+  }
+
+  r_local_lo = jinfo->jregs->r_local[local+1];
+  if (r_local_lo) {
+    Thumb2_Corrupt(jinfo, r_local_lo, 0);
+    mov_reg(jinfo->codebuf, r_local_lo, r_lo);
+  } else {
+    store_local(jinfo, r_lo, local+1, stackdepth-jstack->depth);
+  }
+}
+
+void Thumb2_Xaload(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_index, r_array, r_value;
+  unsigned op = opc - (unsigned)opc_iaload;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 2);
+  r_index = POP(jstack);
+  r_array = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index));
+  r_value = JSTACK_REG(jstack);
+  PUSH(jstack, r_value);
+  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8);
+  chka(jinfo->codebuf, r_tmp, r_index);
+  if (opc == opc_baload) {
+    add_reg(jinfo->codebuf, r_tmp, r_array, r_index);
+    ldrsb_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else if (opc == opc_caload) {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
+    ldrh_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else if (opc == opc_saload) {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
+    ldrsh_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 2);
+    ldr_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  }
+}
+
+void Thumb2_X2aload(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_index, r_array, r_lo, r_hi;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 2);
+  r_index = POP(jstack);
+  r_array = POP(jstack);
+  Thumb2_Spill(jinfo, 2, 0);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index));
+  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8);
+  chka(jinfo->codebuf, r_tmp, r_index);
+  add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 3);
+  LDRD_PRE(jstack, r_lo, r_hi);
+  ldrd_imm(jinfo->codebuf, r_lo, r_hi, r_tmp, 16);
+}
+
+void Thumb2_Xastore(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_value, r_index, r_array;
+  unsigned op = opc - (unsigned)opc_iastore;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 3);
+  r_value = POP(jstack);
+  r_index = POP(jstack);
+  r_array = POP(jstack);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index)|(1<<r_value));
+  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8);
+  chka(jinfo->codebuf, r_tmp, r_index);
+  if (opc == opc_bastore) {
+    add_reg(jinfo->codebuf, r_tmp, r_array, r_index);
+    strb_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else if (opc == opc_castore || opc == opc_sastore) {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
+    strh_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  } else {
+    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 2);
+    str_imm(jinfo->codebuf, r_value, r_tmp, 12);
+  }
+}
+
+void Thumb2_X2astore(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi, r_index, r_array;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 4);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  r_index = POP(jstack);
+  r_array = POP(jstack);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index)|(1<<r_lo)|(1<<r_hi));
+  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8);
+  chka(jinfo->codebuf, r_tmp, r_index);
+  add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 3);
+  strd_imm(jinfo->codebuf, r_lo, r_hi, r_tmp, 16);
+}
+
+void Thumb2_Pop(Thumb2_Info *jinfo, unsigned n)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+
+  while (n > 0 && jstack->depth > 0) {
+    POP(jstack);
+    n--;
+  }
+  if (n > 0) add_imm(jinfo->codebuf, Rstack, Rstack, n * 4);
+}
+
+void Thumb2_Dup(Thumb2_Info *jinfo, unsigned n)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth;
+  unsigned i;
+
+  Thumb2_Fill(jinfo, n+1);
+  depth = jstack->depth;
+  for (i = 0; i <= n; i++)
+    stack[depth-i] = stack[depth-i-1];
+  stack[depth-n-1] = stack[depth];
+  jstack->depth = depth + 1;
+}
+
+void Thumb2_Dup2(Thumb2_Info *jinfo, unsigned n)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned *stack = jstack->stack;
+  unsigned depth;
+  unsigned i;
+
+  Thumb2_Fill(jinfo, n+2);
+  depth = jstack->depth;
+  for (i = 0; i <= n+1; i++)
+    stack[depth-i+1] = stack[depth-i-1];
+  stack[depth-n-1] = stack[depth+1];
+  stack[depth-n-2] = stack[depth];
+  jstack->depth = depth + 2;
+}
+
+void Thumb2_Swap(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+
+  Thumb2_Fill(jinfo, 2);
+  SWAP(jstack);
+}
+
+void Thumb2_iOp(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lho, r_rho, r;
+
+  Thumb2_Fill(jinfo, 2);
+  r_rho = POP(jstack);
+  r_lho = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r = JSTACK_REG(jstack);
+  PUSH(jstack, r);
+  switch (opc) {
+  case opc_ishl:
+  case opc_ishr:
+  case opc_iushr:
+    {
+      unsigned tmp_reg = Thumb2_Tmp(jinfo, 1 << r_lho | 1 << r_rho | 1 << r);
+      and_imm(jinfo->codebuf, tmp_reg, r_rho, 31);
+      r_rho = tmp_reg;
+      break;
+    }
+  }
+  dop_reg(jinfo->codebuf, dOps[opc-opc_iadd], r, r_lho, r_rho, 0, 0);
+}
+
+void Thumb2_iNeg(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_src, r;
+
+  Thumb2_Fill(jinfo, 1);
+  r_src = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r = JSTACK_REG(jstack);
+  PUSH(jstack, r);
+  rsb_imm(jinfo->codebuf, r, r_src, 0);
+}
+
+void Thumb2_lNeg(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi, r_res_lo, r_res_hi;
+  unsigned r_tmp;
+
+  Thumb2_Fill(jinfo, 2);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
+  Thumb2_Spill(jinfo, 1, (1<<r_hi));
+  r_res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~(1<<r_hi)));
+  JASSERT(r_res_lo != r_res_hi, "oops");
+  JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
+  rsb_imm(jinfo->codebuf, r_res_lo, r_lo, 0);
+  r_tmp = Thumb2_Tmp(jinfo, (1<<r_hi)|(1<<r_res_lo));
+  mov_imm(jinfo->codebuf, r_tmp, 0);
+  dop_reg(jinfo->codebuf, DP_SBC, r_res_hi, r_tmp, r_hi, SHIFT_LSL, 0);
+}
+
+void Thumb2_fNeg(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r, r_result;
+
+  Thumb2_Fill(jinfo, 1);
+  r = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r_result = PUSH(jstack, JSTACK_REG(jstack));
+  eor_imm(jinfo->codebuf, r_result, r, 0x80000000);
+}
+
+// arm_op is either DP_EOR (for dnegate) or DP_BIC (for dabs)
+static void Thumb2_dUnaryOp(Thumb2_Info *jinfo, u32 arm_op)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi, r_res_lo, r_res_hi;
+
+  Thumb2_Fill(jinfo, 2);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
+  Thumb2_Spill(jinfo, 1, (1<<r_hi));
+  r_res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~(1<<r_hi)));
+  JASSERT(r_res_lo != r_res_hi, "oops");
+  JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
+  mov_reg(jinfo->codebuf, r_res_lo, r_lo);
+  dop_imm(jinfo->codebuf, arm_op, r_res_hi, r_hi, 0x80000000);
+}
+
+void Thumb2_dNeg(Thumb2_Info *jinfo)
+{
+  Thumb2_dUnaryOp(jinfo, DP_EOR);
+}
+
+void Thumb2_dAbs(Thumb2_Info *jinfo)
+{
+  Thumb2_dUnaryOp(jinfo, DP_BIC);
+}
+
+void Thumb2_lOp(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned res_lo, res_hi;
+  unsigned lho_lo, lho_hi;
+  unsigned rho_lo, rho_hi;
+
+  Thumb2_Fill(jinfo, 4);
+  rho_lo = POP(jstack);
+  rho_hi = POP(jstack);
+  lho_lo = POP(jstack);
+  lho_hi = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  res_hi = PUSH(jstack, JSTACK_REG(jstack));
+  Thumb2_Spill(jinfo, 1, (1<<lho_hi)|(1<<rho_hi));
+  res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_hi)|(1<<rho_hi))));
+  JASSERT(res_lo != rho_hi && res_lo != lho_hi, "res_lo != rho_hi && res_lo != lho_hi");
+  dop_reg(jinfo->codebuf, dOps[opc-opc_ladd], res_lo, lho_lo, rho_lo, SHIFT_LSL, 0);
+  dop_reg(jinfo->codebuf, dOps[opc-opc_ladd+1], res_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
+}
+
+void Thumb2_lmul(Thumb2_Info *jinfo)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned res_lo, res_hi;
+  unsigned lho_lo, lho_hi;
+  unsigned rho_lo, rho_hi;
+  unsigned r_tmp_lo, r_tmp_hi;
+  unsigned op_mask;
+
+  Thumb2_Fill(jinfo, 4);
+  rho_lo = POP(jstack);
+  rho_hi = POP(jstack);
+  lho_lo = POP(jstack);
+  lho_hi = POP(jstack);
+  op_mask = (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi);
+  Thumb2_Spill(jinfo, 2, 0);
+  res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~op_mask));
+  res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~op_mask));
+  r_tmp_lo = res_lo;
+  r_tmp_hi = res_hi;
+  if (op_mask & (1<<r_tmp_lo)) r_tmp_lo = Thumb2_Tmp(jinfo, op_mask);
+  if (op_mask & (1<<r_tmp_hi)) r_tmp_hi = Thumb2_Tmp(jinfo, op_mask|(1<<r_tmp_lo));
+  umull(jinfo->codebuf, r_tmp_lo, r_tmp_hi, rho_lo, lho_lo);
+  mla(jinfo->codebuf, r_tmp_hi, rho_lo, lho_hi, r_tmp_hi);
+  mla(jinfo->codebuf, res_hi, rho_hi, lho_lo, r_tmp_hi);
+  mov_reg(jinfo->codebuf, res_lo, r_tmp_lo);
+}
+
+void Thumb2_fOp(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned rho, lho, res;
+
+  Thumb2_Fill(jinfo, 2);
+  rho = POP(jstack);
+  lho = POP(jstack);
+  Thumb2_Spill(jinfo, 1, 0);
+  res = PUSH(jstack, JSTACK_REG(jstack));
+  vmov_reg_s_toVFP(jinfo->codebuf, VFP_S0, lho);
+  vmov_reg_s_toVFP(jinfo->codebuf, VFP_S1, rho);
+  vop_reg_s(jinfo->codebuf, dOps[opc-opc_iadd], VFP_S0, VFP_S0, VFP_S1);
+  vmov_reg_s_toARM(jinfo->codebuf, res, VFP_S0);
+}
+
+void Thumb2_dOp(Thumb2_Info *jinfo, u32 opc)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned rho_lo, rho_hi, lho_lo, lho_hi, res_lo, res_hi;
+
+  Thumb2_Fill(jinfo, 4);
+  rho_lo = POP(jstack);
+  rho_hi = POP(jstack);
+  lho_lo = POP(jstack);
+  lho_hi = POP(jstack);
+  Thumb2_Spill(jinfo, 2, 0);
+  res_hi = PUSH(jstack, JSTACK_REG(jstack));
+  res_lo = PUSH(jstack, JSTACK_REG(jstack));
+  vmov_reg_d_toVFP(jinfo->codebuf, VFP_D0, lho_lo, lho_hi);
+  vmov_reg_d_toVFP(jinfo->codebuf, VFP_D1, rho_lo, rho_hi);
+  vop_reg_d(jinfo->codebuf, dOps[opc-opc_iadd], VFP_D0, VFP_D0, VFP_D1);
+  vmov_reg_d_toARM(jinfo->codebuf, res_lo, res_hi, VFP_D0);
+}
+
+void Thumb2_Handler(Thumb2_Info *jinfo, unsigned handler, unsigned opcode, unsigned bci)
+{
+  mov_imm(jinfo->codebuf, ARM_R0, opcode);
+  mov_imm(jinfo->codebuf, ARM_R1, bci);
+  mov_imm(jinfo->codebuf, ARM_IP, 0);
+  str_imm(jinfo->codebuf, ARM_IP, ARM_IP, 0);
+}
+
+void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start);
+
+// called from the SEGV handling code to see if a polling page read
+// is from a legitimate safepoint address
+int Thumb2_Install_Safepoint_PC(ucontext_t *uc, int magicByteOffset)
+{
+  mcontext_t *mc = &uc->uc_mcontext;
+  unsigned long arm_pc = mc->arm_pc;
+  // ensure the faulting instruction lies in JITted code
+  if (arm_pc < (unsigned long)(thumb2_codebuf + 1)) {
+    return false;
+  }
+  if (arm_pc >= (unsigned long)thumb2_codebuf->sp) {
+    return false;
+  }
+  // skip to the MAGIC word and check it is valid
+  arm_pc +=magicByteOffset;
+  if (Thumb2) {
+    if (*((short*)arm_pc) != (short)THUMB2_POLLING_PAGE_MAGIC) {
+      return false;
+    }
+    // skip the magic word 
+    arm_pc += 2;
+  } else {
+    if (*((unsigned*)arm_pc) != (unsigned)ARM_POLLING_PAGE_MAGIC) {
+      return false;
+    }
+    // skip the magic word 
+    arm_pc += 4;
+  }
+  mc->arm_pc = arm_pc;
+
+  return true;
+}
+
+// Insert code to poll the SafepointSynchronize state and call
+// Helper_SafePoint.
+// -- if offset is negative it identifies a bytecode index which
+// should be jumped to via an unconditional backward branch
+// taken either before or after executing the safepoint check
+// -- if offset is zero or positive then a return or conditional
+// branch, respectively, needs to be compiled so control should
+// flow to end of the safepoint check whether or not it is executed
+
+void Thumb2_Safepoint(Thumb2_Info *jinfo, int stackdepth, int bci, int offset)
+{
+  // normal case: read the polling page and branch to skip
+  // the safepoint test
+  // abnormal case: read the polling page, trap to handler
+  // which resets return address into the safepoint check code
+  //
+  // with a negative offset the generated code will look like
+  //    movw r_tmp, #polling_page
+  //    movt r_tmp, #polling_page
+  //    ldr r_tmp, [r_tmp, #K] ; K == 2 * byte offset to the magic word
+  //    b.n #branchtarget
+  //    #POLLING_PAGE_MAGIC ; magic data word
+  //    <
+  //     safepoint check  code
+  //    >
+  //    b.n #branchtarget
+  //
+  // i.e. the generated code includes the branch backwards twice
+  // and relies on a fault at the ldr to skip into the safepoint code
+  //
+  // with a zero or positive offset the caller will plant the return
+  // (zero) or conditional branch (positive) code after the check so
+  // the normal path skips round the safepoint check code and the
+  // abnormal path just drops through. the generated code will look
+  // like
+  //
+  //    movw r_tmp, #polling_page
+  //    movt r_tmp, #polling_page
+  //    ldr r_tmp, [r_tmp, #0]
+  //    b.n L1
+  //    POLLING_PAGE_MAGIC ; data
+  //    <
+  //     safepoint check  code
+  //    >
+  // L1:
+  //    <caller plants branch/return here>
+  //
+  //  n.b. for a return there is no need save or restore locals
+
+  bool is_return = offset == 0; // This is some kind of return bytecode
+
+  int r_tmp = Thumb2_Tmp(jinfo, 0);
+  unsigned dest;
+  if (offset < 0) {
+    // the index of the backward branch target in the code buffer
+    dest = jinfo->bc_stackinfo[bci+offset] & ~BC_FLAGS_MASK;
+  } else {
+    dest = 0;
+  }
+  mov_imm(jinfo->codebuf, r_tmp, (u32)os::get_polling_page());
+  // this encodes the offset from the read instruction to the magic
+  // word into the fault address, assuming it is 4 bytes. however, if
+  // we need to plant a wide backwards branch we may need to rewrite
+  // this instruction with offset 6. so stash the instruction location
+  // here just in case. n.b. the offset is doubled to ensure the fault
+  // address in aligned -- aligned reads always use a single 16-bit
+  // instruction whereas non-aligned reads require 2 x 16 bit words
+  unsigned read_loc = out_loc(jinfo->codebuf);
+  unsigned loc_fwd_branch = 0;
+  ldr_imm(jinfo->codebuf, r_tmp, r_tmp, Thumb2 ? 8 : 16);
+  if (offset < 0) {
+    branch_uncond(jinfo->codebuf, dest);
+    if (Thumb2) {
+      unsigned magic_loc = out_loc(jinfo->codebuf);
+      if (magic_loc - read_loc != 4) {
+	JASSERT(magic_loc - read_loc == 6, "bad safepoint offset to magic word");
+	// must have needed a wide branch so patch the load instruction
+	jinfo->codebuf->idx = read_loc >> 1;
+	ldr_imm(jinfo->codebuf, r_tmp, r_tmp, 12);
+	jinfo->codebuf->idx = magic_loc >> 1;
+      }
+    }
+  } else {
+    // leave space for the forward skip branch
+    // location of branch instruction is read_loc + 2
+    loc_fwd_branch = forward_short(jinfo->codebuf);
+  }
+  // now write a magic word after the branch so the signal handler can
+  // test that a polling page read is kosher
+  if (Thumb2)
+    out_16(jinfo->codebuf, THUMB2_POLLING_PAGE_MAGIC);
+  else
+    out_32(jinfo->codebuf, ARM_POLLING_PAGE_MAGIC);
+
+  {
+    // Flush the stack to memory and save its register state.
+    SAVE_STACK(jinfo->jstack);
+    Thumb2_Flush(jinfo);
+
+    // We don't save or restore locals if we're returning.
+    if (! is_return)
+      Thumb2_save_local_refs(jinfo, stackdepth);
+
+    // now the safepoint polling code itself
+    mov_imm(jinfo->codebuf, ARM_R1, bci+CONSTMETHOD_CODEOFFSET);
+    add_imm(jinfo->codebuf, ARM_R2, ISTATE_REG(jinfo),
+	    ISTATE_OFFSET(jinfo, stackdepth, 0));
+    bl(jinfo->codebuf, handlers[H_SAFEPOINT]);
+
+    if (! is_return)
+      Thumb2_restore_local_refs(jinfo, stackdepth);
+
+    RESTORE_STACK(jinfo->jstack, jinfo->codebuf);
+
+    if (offset < 0) {
+      // needs another unconditional backward branch
+      branch_uncond(jinfo->codebuf, dest);
+    } else {
+      // patch in the forward skip branch
+      branch_narrow_patch(jinfo->codebuf, loc_fwd_branch);
+    }
+  }
+}
+
+// If this is a backward branch, compile a safepoint check
+void Thumb2_Cond_Safepoint(Thumb2_Info *jinfo, int stackdepth, int bci) {
+  int offset = GET_JAVA_S2(jinfo->code_base + bci + 1);
+  unsigned dest_taken = bci + offset;
+
+  if (jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
+    // pass offset as positive so the safepoint code plant a forward
+    // skip over the test rather than doing an unconditional backwards
+    // branch. that allows the condition test to be planted by
+    // whatever followed this call
+    Thumb2_Safepoint(jinfo, stackdepth, bci, -offset);
+  }
+}
+
+int Thumb2_Branch(Thumb2_Info *jinfo, unsigned bci, unsigned cond)
+{
+    int offset = GET_JAVA_S2(jinfo->code_base + bci + 1);
+    unsigned dest_taken = bci + offset;
+    unsigned dest_not_taken = bci + 3;
+    unsigned loc;
+
+    if (jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
+      branch(jinfo->codebuf, cond, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
+      return dest_not_taken;
+    }
+    loc = forward_long(jinfo->codebuf);
+    Thumb2_codegen(jinfo, dest_not_taken);
+    JASSERT(jinfo->bc_stackinfo[dest_taken] & BC_COMPILED, "dest in branch not compiled!!!");
+    branch_patch(jinfo->codebuf, cond, loc, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
+    return -1;
+}
+
+int Thumb2_Goto(Thumb2_Info *jinfo, unsigned bci, int offset, int len, int stackdepth = -1)
+{
+    unsigned dest_taken = bci + offset;
+    unsigned dest_not_taken = bci + len;
+    unsigned loc;
+
+    if (stackdepth >= 0
+	&& jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
+      // n.b. the backwards branch will be planted by the safepoint routine
+      Thumb2_Safepoint(jinfo, stackdepth, bci, offset);
+      return dest_not_taken;
+    }
+    loc = forward_long(jinfo->codebuf);
+    Thumb2_codegen(jinfo, dest_not_taken);
+    JASSERT(jinfo->bc_stackinfo[dest_taken] & BC_COMPILED, "dest in goto not compiled!!!");
+    branch_uncond_patch(jinfo->codebuf, loc, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
+    return -1;
+}
+
+void Thumb2_save_local_refs(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if ((locals_info[i] & (1 << LOCAL_REF)) && (locals_info[i] & (1 << LOCAL_MODIFIED))) {
+	store_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_restore_local_refs(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if (locals_info[i] & (1<<LOCAL_REF)) {
+	load_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_save_all_locals(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if (locals_info[i] & (1 << LOCAL_MODIFIED)) {
+	store_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_restore_all_locals(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+	load_local(jinfo, r, i, stackdepth);
+    }
+  }
+}
+
+void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth)
+{
+    Thumb2_Flush(jinfo);
+    Thumb2_save_all_locals(jinfo, stackdepth);
+    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+    bl(jinfo->codebuf, handlers[handler]);
+}
+
+void Thumb2_Return(Thumb2_Info *jinfo, unsigned opcode, int bci, int stackdepth)
+{
+  Symbol *name = jinfo->method->name();
+  Thumb2_Safepoint(jinfo, stackdepth, bci, 0);
+
+  Reg r_lo, r;
+  Thumb2_Stack *jstack = jinfo->jstack;
+
+  if (jinfo->method->has_monitor_bytecodes()) {
+    Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
+  }
+
+  if (jinfo->method->is_synchronized()) {
+    unsigned loc_success1, loc_success2, loc_failed, loc_retry, loc_exception;
+    unsigned loc_illegal_monitor_state;
+    Thumb2_Flush(jinfo);
+//    Thumb2_save_local_refs(jinfo);
+    // Free the monitor
+    //
+    // 		add	r1, #<stackdepth>-8
+    // 		ldr	r2, [r1, #4]
+    //		cbz	r2, throw_illegal_monitor_state
+    //		ldr	r0, [r1, #0]
+    //		mov	r3, #0
+    //		str	r3, [r1, #4]
+    //		cbz	r0, success
+    //	retry:
+    //		ldrex	r3, [r2, #0]
+    //		cmp	r1, r3
+    //		bne	failed
+    //		strex	r3, r0, [r2, #0]
+    //		cbz	r3, success
+    //		b	retry
+    //	failed:
+    //		str	r2, [r1, #4]
+    //		...
+    //  success:
+    //
+    // JAZ_V1 == tmp2
+    // JAZ_V2 == tmp1
+    add_imm(jinfo->codebuf, ARM_R1, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) - frame::interpreter_frame_monitor_size()*wordSize);
+
+    ldr_imm(jinfo->codebuf, ARM_R2, ARM_R1, 4);
+    loc_illegal_monitor_state = forward_cb(jinfo->codebuf);
+    ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0);
+    mov_imm(jinfo->codebuf, ARM_R3, 0);
+    str_imm(jinfo->codebuf, ARM_R3, ARM_R1, 4);
+    loc_success1 = forward_cb(jinfo->codebuf);
+    loc_retry = out_loc(jinfo->codebuf);
+    ldrex_imm(jinfo->codebuf, ARM_R3, ARM_R2);
+    cmp_reg(jinfo->codebuf, ARM_R1, ARM_R3);
+    loc_failed = forward_short(jinfo->codebuf);
+    strex_imm(jinfo->codebuf, ARM_R3, ARM_R0, ARM_R2);
+    loc_success2 = forward_cb(jinfo->codebuf);
+    branch_uncond(jinfo->codebuf, loc_retry);
+    bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+    cbz_patch(jinfo->codebuf, ARM_R2, loc_illegal_monitor_state);
+    str_imm(jinfo->codebuf, ARM_R2, ARM_R1, 4);
+    mov_imm(jinfo->codebuf, ARM_R0, 0+CONSTMETHOD_CODEOFFSET);
+    bl(jinfo->codebuf, handlers[H_SYNCHRONIZED_EXIT]);
+    loc_exception = forward_cb(jinfo->codebuf);
+    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
+    cbz_patch(jinfo->codebuf, ARM_R0, loc_exception);
+    cbz_patch(jinfo->codebuf, ARM_R0, loc_success1);
+    cbz_patch(jinfo->codebuf, ARM_R3, loc_success2);
+  }
+
+  if (opcode == opc_return) {
+    if (jinfo->compiled_return) {
+      unsigned ret_idx = jinfo->compiled_return;
+
+      branch_uncond(jinfo->codebuf, ret_idx);
+      return;
+    }
+    jinfo->compiled_return = jinfo->codebuf->idx * 2;
+  } else {
+    if (opcode == opc_lreturn || opcode == opc_dreturn) {
+      Thumb2_Fill(jinfo, 2);
+      r_lo = POP(jstack);
+      r = POP(jstack);
+    } else {
+      Thumb2_Fill(jinfo, 1);
+      r = POP(jstack);
+      if (jinfo->compiled_word_return[r]) {
+        unsigned ret_idx = jinfo->compiled_word_return[r];
+
+        branch_uncond(jinfo->codebuf, ret_idx);
+        return;
+      }
+      jinfo->compiled_word_return[r] = jinfo->codebuf->idx * 2;
+    }
+  }
+
+  mov_imm(jinfo->codebuf, ARM_LR, 0);
+  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_LAST_JAVA_SP);
+  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_LAST_JAVA_FP);
+  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 0);
+
+  if (opcode == opc_return) {
+    add_imm(jinfo->codebuf, Rstack, Rstack, jinfo->method->max_locals() * sizeof(int) + 4);
+  } else {
+    if (opcode == opc_lreturn || opcode == opc_dreturn) {
+      str_imm(jinfo->codebuf, r, Rstack, jinfo->method->max_locals() * sizeof(int));
+      str_imm_wb(jinfo->codebuf, r_lo, Rstack, jinfo->method->max_locals() * sizeof(int)-4, 1, 1);
+    } else {
+      str_imm_wb(jinfo->codebuf, r, Rstack, jinfo->method->max_locals() * sizeof(int), 1, 1);
+    }
+  }
+
+  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_TOP_ZERO_FRAME);
+  str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP);
+
+  // deoptimized_frames = 0
+  // FIXME: This should be done in the slow entry, but only three
+  // words are allocated there for the instructions.
+  mov_imm(jinfo->codebuf, ARM_R0, 0);
+
+  ldm(jinfo->codebuf, C_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+}
+
+int Thumb2_Accessor(Thumb2_Info *jinfo)
+{
+  jubyte *code_base = jinfo->code_base;
+  constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+  ConstantPoolCacheEntry* cache;
+  int index = GET_NATIVE_U2(code_base+2);
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  unsigned slow_entry;
+
+  JASSERT(code_base[0] == opc_aload_0 || code_base[0] == opc_iaccess_0, "not an aload_0 in accessor");
+  JASSERT(code_base[4] == opc_ireturn || code_base[4] == opc_areturn, "not an ireturn in accessor");
+  cache = cp->entry_at(index);
+  if (!cache->is_resolved((Bytecodes::Code)opc_getfield)) return 0;
+
+  TosState tos_type = cache->flag_state();
+  int field_offset = cache->f2_as_index();
+
+  // Slow entry point - callee save
+  // R0 = method
+  // R2 = thread
+  slow_entry = out_pos(jinfo->codebuf);
+  stm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  bl(jinfo->codebuf, slow_entry + FAST_ENTRY_OFFSET);
+  ldm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+  out_align(jinfo->codebuf, sizeof(unsigned));
+
+  out_32(jinfo->codebuf, 0);	// pointer to osr table
+  out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
+  out_32(jinfo->codebuf, 0);	// next compiled method
+
+  out_32(jinfo->codebuf, -1);    // regusage
+  out_32(jinfo->codebuf, -1);
+  out_32(jinfo->codebuf, -1);
+
+  out_align(jinfo->codebuf, CODE_ALIGN);
+
+  // fast entry point
+  bc_stackinfo[0] = (bc_stackinfo[0] & BC_FLAGS_MASK) | (jinfo->codebuf->idx * 2) | BC_COMPILED;
+  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_JAVA_SP);
+  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0);
+  if (tos_type == btos)
+    ldrsb_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset);
+  else if (tos_type == ctos)
+    ldrh_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset);
+  else if (tos_type == stos)
+    ldrsh_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset);
+  else
+    ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset);
+  str_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0);
+
+  if (cache->is_volatile())
+    fullBarrier(jinfo->codebuf);
+
+  // deoptimized_frames = 0
+  mov_imm(jinfo->codebuf, ARM_R0, 0);
+  mov_reg(jinfo->codebuf, ARM_PC, ARM_LR);
+
+  return 1;
+}
+
+#define STACKDEPTH(jinfo, stackinfo) (((stackinfo) & ~BC_FLAGS_MASK) + \
+	((jinfo)->method->is_synchronized() ? frame::interpreter_frame_monitor_size() : 0))
+
+
+void Thumb2_Enter(Thumb2_Info *jinfo)
+{
+  int parms = jinfo->method->size_of_parameters();
+  int extra_locals = jinfo->method->max_locals() - parms;
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+  unsigned stackdepth = 0;
+  unsigned slow_entry;
+  Symbol *name = jinfo->method->name();
+
+  // Slow entry point - callee save
+  // R0 = method
+  // R2 = thread
+  slow_entry = out_pos(jinfo->codebuf);
+  stm(jinfo->codebuf, I_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  bl(jinfo->codebuf, slow_entry + FAST_ENTRY_OFFSET);
+  ldm(jinfo->codebuf, I_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+  out_align(jinfo->codebuf, sizeof(unsigned));
+
+  out_32(jinfo->codebuf, 0);	// Space for osr_table pointer
+  out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
+  out_32(jinfo->codebuf, 0);	// Pointer to next method
+
+  out_32(jinfo->codebuf, 0);    // regusage
+  out_32(jinfo->codebuf, 0);
+  out_32(jinfo->codebuf, 0);
+
+  out_align(jinfo->codebuf, CODE_ALIGN);
+
+  // Fast entry point == Slow entry + 64 - caller save
+  // R0 = method
+  // R2 = thread
+  stm(jinfo->codebuf, C_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP);
+  {
+    unsigned stacksize;
+
+    stacksize = (extra_locals + jinfo->method->max_stack()) * sizeof(int);
+    stacksize += FRAME_SIZE + STACK_SPARE;
+    if (!jinfo->is_leaf || stacksize > LEAF_STACK_SIZE) {
+      ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_STACK_BASE);
+      sub_imm(jinfo->codebuf, ARM_R1, Rstack, stacksize + LEAF_STACK_SIZE);
+      cmp_reg(jinfo->codebuf, ARM_R3, ARM_R1);
+      bl(jinfo->codebuf, handlers[H_STACK_OVERFLOW], COND_CS);
+    }
+  }
+  mov_imm(jinfo->codebuf, ARM_R1, 0);
+
+  if (extra_locals > 0) {
+    sub_imm(jinfo->codebuf, Rstack, Rstack, extra_locals * 4);
+
+    for (i = 0; i < extra_locals; i++) {
+      unsigned linfo = locals_info[parms+i];
+      if (linfo & (1<< LOCAL_REF) || ((linfo >> LOCAL_INT) & 0x1f) == 0)
+	str_imm(jinfo->codebuf, ARM_R1, Rstack, (extra_locals-1 - i) * 4);
+    }
+  }
+
+  ldr_imm(jinfo->codebuf, ARM_IP, ARM_R0, METHOD_CONSTMETHOD);
+  ldr_imm(jinfo->codebuf, ARM_IP, ARM_IP, METHOD_CONSTANTS);
+
+  add_imm(jinfo->codebuf, Rlocals, Rstack, (jinfo->method->max_locals()-1) * sizeof(int));
+
+  sub_imm(jinfo->codebuf, Rstack, Rstack, FRAME_SIZE);
+
+  if (jinfo->use_istate) mov_reg(jinfo->codebuf, Ristate, Rstack);
+  store_istate(jinfo, Rstack, ISTATE_SELF_LINK, stackdepth);
+
+  store_istate(jinfo, Rstack, ISTATE_MONITOR_BASE, stackdepth);
+
+  store_istate(jinfo, Rlocals, ISTATE_LOCALS, stackdepth);
+
+  if (jinfo->method->is_synchronized()) {
+    sub_imm(jinfo->codebuf, Rstack, Rstack, frame::interpreter_frame_monitor_size()*wordSize);
+    stackdepth = frame::interpreter_frame_monitor_size();
+    if (jinfo->method->is_static()) {
+      ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_POOL_HOLDER);
+      ldr_imm(jinfo->codebuf, JAZ_V1, ARM_R3, KLASS_PART+KLASS_JAVA_MIRROR);
+    } else {
+      ldr_imm(jinfo->codebuf, JAZ_V1, Rlocals, 0);
+    }
+    str_imm(jinfo->codebuf, JAZ_V1, Rstack, 4);
+  }
+
+  store_istate(jinfo, ARM_R1, ISTATE_MSG, stackdepth);
+  store_istate(jinfo, ARM_R1, ISTATE_OOP_TEMP, stackdepth);
+
+  sub_imm(jinfo->codebuf, ARM_R3, Rstack, jinfo->method->max_stack() * sizeof(int));
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP);
+
+  store_istate(jinfo, Rstack, ISTATE_STACK_BASE, stackdepth);
+
+  sub_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4);
+  store_istate(jinfo, ARM_R3, ISTATE_STACK_LIMIT, stackdepth);
+
+  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME);
+  store_istate(jinfo, ARM_R3, ISTATE_NEXT_FRAME, stackdepth);
+
+  mov_imm(jinfo->codebuf, ARM_R3, INTERPRETER_FRAME);
+  store_istate(jinfo, ARM_R3, ISTATE_FRAME_TYPE, stackdepth);
+
+  mov_imm(jinfo->codebuf, ARM_R1, 0);   // set last SP to zero before
+                                        // setting FP
+  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP);
+  add_imm(jinfo->codebuf, ARM_R3, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) + ISTATE_NEXT_FRAME);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_FP);
+  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_SP);
+
+  ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_CACHE);
+  store_istate(jinfo, ARM_R3, ISTATE_CONSTANTS, stackdepth);
+
+  store_istate(jinfo, Rthread, ISTATE_THREAD, stackdepth);
+  store_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
+
+  if (jinfo->method->is_synchronized()) {
+    unsigned loc_retry, loc_failed, loc_success, loc_exception;
+
+    // JAZ_V1 == monitor object
+    //
+    // Try to acquire the monitor. Seems very sub-optimal
+    // 		ldr	r3, [JAZ_V1, #0]
+    // 		orr	r3, r3, #1
+    // 		str	r3, [Rstack, #0]
+    // 	retry:
+    // 		ldrex	r0, [JAZ_V1, #0]
+    // 		cmp	r3, r0
+    // 		bne	failed
+    // 		strex	r0, Rstack, [JAZ_V1, #0]
+    // 		cbz	r0, success
+    // 		b	retry
+    // 	failed:
+    // 		<failed - someone else has the monitor - must yield>
+    //  success:
+    // 		<success - acquired the monitor>
+    //
+    ldr_imm(jinfo->codebuf, ARM_R3, JAZ_V1, 0);
+    orr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 1);
+    str_imm(jinfo->codebuf, ARM_R3, Rstack, 0);
+    loc_retry = out_loc(jinfo->codebuf);
+// retry:
+    ldrex_imm(jinfo->codebuf, ARM_R0, JAZ_V1);
+    cmp_reg(jinfo->codebuf, ARM_R3, ARM_R0);
+    loc_failed = forward_short(jinfo->codebuf);
+    strex_imm(jinfo->codebuf, ARM_R0, Rstack, JAZ_V1);
+    loc_success = forward_cb(jinfo->codebuf);
+    branch_uncond(jinfo->codebuf, loc_retry);
+    bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+// failed:
+    mov_imm(jinfo->codebuf, ARM_R0, 0+CONSTMETHOD_CODEOFFSET);
+    bl(jinfo->codebuf, handlers[H_SYNCHRONIZED_ENTER]);
+    loc_exception = forward_cb(jinfo->codebuf);
+    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
+    cbz_patch(jinfo->codebuf, ARM_R0, loc_exception);
+    cbz_patch(jinfo->codebuf, ARM_R0, loc_success);
+// success:
+
+  }
+
+  {
+    int nlocals = jinfo->method->max_locals();
+
+    for (i = 0; i < nlocals; i++) {
+      Reg r = jinfo->jregs->r_local[i];
+      if (r) {
+	unsigned stackdepth = STACKDEPTH(jinfo, 0);
+        if (i < parms)
+	  load_local(jinfo, r, i, stackdepth);
+        else if (locals_info[i] & (1<<LOCAL_REF))
+          mov_reg(jinfo->codebuf, r, ARM_R1);
+      }
+    }
+  }
+}
+
+unsigned opcode2handler[] = {
+  H_IDIV,
+  H_LDIV,
+  0, 0,			// fdiv, ddiv
+  H_IREM,
+  H_LREM,
+  H_FREM,
+  H_DREM,
+  0, 0, 0, 0,		// ineg, lneg, fneg, dneg
+  0, 0, 0, 0, 0, 0,	// shifts
+  0, 0, 0, 0, 0, 0,	// and, or, xor
+  0,			// iinc
+  0,			// i2l
+  H_I2F,
+  H_I2D,
+  0,			// l2i
+  H_L2F,
+  H_L2D,
+  H_F2I,
+  H_F2L,
+  H_F2D,
+  H_D2I,
+  H_D2L,
+  H_D2F,
+};
+
+// Generate code for a load of a jlong.
+
+void Thumb2_load_long(Thumb2_Info *jinfo, Reg r_lo, Reg r_hi, Reg base,
+		      int field_offset,
+		      bool is_volatile = false)
+{
+  Thumb2_Stack *jstack = jinfo->jstack;
+  CodeBuf *codebuf = jinfo->codebuf;
+  if (is_volatile && os::is_MP()) {
+    Reg r_addr = base;
+    if (field_offset) {
+      r_addr = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base));
+      add_imm(codebuf, r_addr, base, field_offset);
+    }
+    ldrexd(codebuf, r_lo, r_hi, r_addr);
+  } else {
+    LDRD_PRE(jstack, r_lo, r_hi);
+    ldrd_imm(codebuf, r_lo, r_hi, base, field_offset);
+  }
+}
+
+// Generate code for a store of a jlong.  If the operand is volatile,
+// generate a sequence of the form
+//
+// .Ldst
+// 	ldrexd 	r2, r3, [dst]
+// 	strexd 	r2, r0, r1, [dst]
+// 	cmp 	r2, #0
+// 	bne 	.Ldst
+
+void Thumb2_store_long(Thumb2_Info *jinfo, Reg r_lo, Reg r_hi, Reg base,
+		      int field_offset,
+		      bool is_volatile = false)
+{
+  CodeBuf *codebuf = jinfo->codebuf;
+  if (is_volatile && os::is_MP()) {
+    Reg r_addr = base;
+    Reg tmp1 = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base));
+    Reg tmp2 = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base) | (1<<tmp1));
+    if (field_offset) {
+      r_addr = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base) | (1<<tmp1) | (1<<tmp2));
+      add_imm(jinfo->codebuf, r_addr, base, field_offset);
+    }
+    int loc = out_loc(codebuf);
+    ldrexd(codebuf, tmp1, tmp2, r_addr);
+    strexd(codebuf, tmp1, r_lo, r_hi, r_addr);
+    cmp_imm(codebuf, tmp1, 0);
+    branch(codebuf, COND_NE, loc);
+  } else {
+    strd_imm(codebuf, r_lo, r_hi, base, field_offset);
+  }
+}
+
+#define OPCODE2HANDLER(opc) (handlers[opcode2handler[(opc)-opc_idiv]])
+
+extern "C" void _ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc(void);
+
+// Push VFP_REG to the java stack.
+static void vfp_to_jstack(Thumb2_Info *jinfo, int vfp_reg) {
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+  vmov_reg_d_toARM(jinfo->codebuf, r_lo, r_hi, vfp_reg);
+}
+
+// Pop the java stack to VFP_REG .
+static void jstack_to_vfp(Thumb2_Info *jinfo, int vfp_reg) {
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned r_lo, r_hi;
+  Thumb2_Fill(jinfo, 2);
+  r_lo = POP(jstack);
+  r_hi = POP(jstack);
+  vmov_reg_d_toVFP(jinfo->codebuf, vfp_reg, r_lo, r_hi);
+  Thumb2_Flush(jinfo);
+}
+
+// Expand a call to a "special" method.  These are usually inlines of
+// java.lang.Math methods.  Return true if the inlining succeeded.
+static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo,
+				  unsigned stackdepth) {
+  Thumb2_Stack *jstack = jinfo->jstack;
+  CodeBuf *codebuf = jinfo->codebuf;
+
+  const char *entry_name;
+
+  switch (callee->intrinsic_id()) {
+  case vmIntrinsics::_dabs:
+   {
+     Thumb2_dAbs(jinfo);
+     return true;
+    }
+
+#ifdef __ARM_PCS_VFP
+  case vmIntrinsics::_dsin:
+    entry_name = "Java_java_lang_StrictMath_sin";
+    break;
+
+  case vmIntrinsics::_dcos:
+    entry_name = "Java_java_lang_StrictMath_cos";
+    break;
+
+  case vmIntrinsics::_dtan:
+    entry_name = "Java_java_lang_StrictMath_tan";
+    break;
+
+  case vmIntrinsics::_dsqrt:
+    {
+      void *entry_point = dlsym(NULL, "Java_java_lang_StrictMath_sqrt");
+      if (! entry_point)
+	return false;
+
+      unsigned r_lo, r_hi, r_res_lo, r_res_hi;
+
+      // Make sure that canonical NaNs are returned, as per the spec.
+      //
+      // Generate:
+      // vsqrt.f64 d0, d1
+      // vcmp.f64 d0, d0
+      // vmrs APSR_nzcv, fpscr
+      // beq.n 0f
+      // vmov.f64 d0, d1
+      // blx Java_java_lang_StrictMath_sqrt
+      // 0:
+      jstack_to_vfp(jinfo, VFP_D1);
+      vop_reg_d(jinfo->codebuf, VP_SQRT, VFP_D0, 0, VFP_D1);
+      vcmp_reg_d(jinfo->codebuf, VFP_D0, VFP_D0, 0);
+      vmrs(jinfo->codebuf, ARM_PC);
+      int loc = forward_short(jinfo->codebuf);
+      vmov_reg_d_VFP_to_VFP(jinfo->codebuf, VFP_D0, VFP_D1);
+      // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
+      // parameter, so it's arguably pointless to pass it here.
+      add_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_JNI_ENVIRONMENT);
+      mov_imm(jinfo->codebuf, ARM_IP, (unsigned)entry_point);
+      blx_reg(jinfo->codebuf, ARM_IP);
+      bcc_patch(jinfo->codebuf, COND_EQ, loc);
+      vfp_to_jstack(jinfo, VFP_D0);
+
+      return true;
+    }
+
+  case vmIntrinsics::_dlog:
+    entry_name = "Java_java_lang_StrictMath_log";
+    break;
+
+  case vmIntrinsics::_dlog10:
+    entry_name = "Java_java_lang_StrictMath_log10";
+    break;
+#endif // __ARM_PCS_VFP
+
+  case vmIntrinsics::_compareAndSwapInt:
+   {
+      Thumb2_Fill(jinfo, 4);
+
+      unsigned update = POP(jstack);
+      unsigned expect = POP(jstack);
+      unsigned offset = POP(jstack);
+      POP(jstack);  // Actually the high part of the offset
+
+      // unsigned object = POP(jstack);
+      // unsigned unsafe = POP(jstack);  // Initially an instance of java.lang.Unsafe
+
+      Thumb2_Flush(jinfo);
+      // Get ourself a result reg that's not one of the inputs
+      unsigned exclude = (1<<update)|(1<<expect)|(1<<offset);
+      unsigned result = JSTACK_PREFER(jstack, ~exclude);
+
+      ldm(codebuf, (1<<ARM_IP)|(1<<ARM_LR), Rstack, POP_FD, 1); // Object addr
+      add_reg(codebuf, result, offset, ARM_IP); // result now points to word
+      ldr_imm(codebuf, ARM_LR, ARM_LR, 0);  // Security check
+
+      fullBarrier(codebuf);
+
+      int retry = out_loc(codebuf);
+      ldrex_imm(codebuf, ARM_LR, result);
+      cmp_reg(codebuf, ARM_LR, expect);
+      int loc_failed = forward_short(codebuf);
+      strex_imm(codebuf, ARM_IP, update, result);
+      cmp_imm(codebuf, ARM_IP, 0);
+      branch(codebuf, COND_NE, retry);
+      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+
+      if (Thumb2) {
+	it(codebuf, COND_NE, IT_MASK_TE);
+	mov_imm(codebuf, result, 0);
+	mov_imm(codebuf, result, 1);
+      } else {
+	mov_imm(codebuf, result, 0, COND_NE);
+	mov_imm(codebuf, result, 1, COND_EQ);
+      }
+      fullBarrier(codebuf);
+
+      PUSH(jstack, result);
+    }
+    return true;
+
+  case vmIntrinsics::_compareAndSwapLong:
+    {
+      // Arch < V6K lacks ldrexd/strexd
+      if (!ARCH_GE_V6K(CPUInfo)) return false;
+
+      Thumb2_Fill(jinfo, 4);
+
+      unsigned update_lo = POP(jstack);
+      unsigned update_hi = POP(jstack);
+      unsigned expect_lo = POP(jstack);
+      unsigned expect_hi = POP(jstack);
+
+      Thumb2_Flush(jinfo);
+      Thumb2_save_all_locals(jinfo, stackdepth - 4); // 4 args popped above
+
+      // ldrexd/strexd can only take an even pair of registers in ARM mode
+      if (!Thumb2) {
+        if (update_hi != update_lo + 1 || (update_lo & 1)) {
+          mov_reg(codebuf, JAZ_V4, update_lo);
+	  mov_reg(codebuf, JAZ_V5, update_hi);
+          update_lo = JAZ_V4;
+          update_hi = JAZ_V5;
+        }
+      }
+
+      // instance of java.lang.Unsafe:
+      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 3 * wordSize);
+      ldr_imm(codebuf, ARM_LR, ARM_LR, 0);  // Security check
+
+      // Object:
+      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 2 * wordSize);
+      // Offset:
+      ldr_imm(jinfo->codebuf, ARM_IP, Rstack, 0 * wordSize);
+      add_reg(codebuf, ARM_LR, ARM_LR, ARM_IP); // ARM_LR now points to word
+
+      fullBarrier(codebuf);
+
+      int retry = out_loc(codebuf);
+      ldrexd(codebuf, JAZ_V2, JAZ_V3, ARM_LR);
+      cmp_reg(codebuf, JAZ_V2, expect_lo);
+      cmp_reg(codebuf, JAZ_V3, expect_hi, COND_EQ); 
+      int loc_failed = forward_short(codebuf);
+      strexd(codebuf, JAZ_V1, update_lo, update_hi, ARM_LR);
+      cmp_imm(codebuf, JAZ_V1, 0);
+      branch(codebuf, COND_NE, retry);
+      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+
+      unsigned result = JSTACK_REG(jinfo->jstack);
+
+      if (Thumb2) {
+	it(codebuf, COND_NE, IT_MASK_TE);
+	mov_imm(codebuf, result, 0);
+	mov_imm(codebuf, result, 1);
+      } else {
+	mov_imm(codebuf, result, 0, COND_NE);
+	mov_imm(codebuf, result, 1, COND_EQ);
+      }
+      fullBarrier(codebuf);
+
+      Thumb2_restore_all_locals(jinfo, stackdepth - 4); // 4 args popped above
+      add_imm(codebuf, Rstack, Rstack, 4 * wordSize);
+      PUSH(jstack, result);
+    }
+    return true;
+
+  default:
+    return false;
+  }
+
+  void *entry_point = dlsym(NULL, entry_name);
+  if (! entry_point)
+    return false;
+
+  jstack_to_vfp(jinfo, VFP_D0);
+  // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
+  // parameter, so it's arguably pointless to pass it here.
+  add_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_JNI_ENVIRONMENT);
+  mov_imm(jinfo->codebuf, ARM_IP, (unsigned)entry_point);
+  blx_reg(jinfo->codebuf, ARM_IP);
+  vfp_to_jstack(jinfo, VFP_D0);
+
+  return true;
+}
+
+void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start)
+{
+  JDEBUG_ (
+  Symbol *name = jinfo->method->name();
+  Symbol *sig = jinfo->method->signature();
+  );
+  unsigned code_size = jinfo->code_size;
+  jubyte *code_base = jinfo->code_base;
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  CodeBuf *codebuf = jinfo->codebuf;
+  Thumb2_Stack *jstack = jinfo->jstack;
+  unsigned bci;
+  unsigned opcode;
+  unsigned stackinfo;
+  int len;
+  unsigned stackdepth;
+
+  for (bci = start; bci < code_size; ) {
+    opcode = code_base[bci];
+    stackinfo = bc_stackinfo[bci];
+    unsigned start_idx;
+
+    if (stackinfo & BC_BRANCH_TARGET) Thumb2_Flush(jinfo);
+
+    if (!OSPACE && (stackinfo & BC_BACK_TARGET)) {
+      if (out_pos(codebuf) & 0x02) nop_16(codebuf);
+      if (out_pos(codebuf) & 0x04) nop_32(codebuf);
+    }
+
+    start_idx = jinfo->codebuf->idx;
+    if (START_BCI(start_idx) == -1) SET_START_BCI(start_idx, bci);
+
+    JASSERT(!(stackinfo & BC_COMPILED), "code already compiled for this bytecode?");
+    stackdepth = STACKDEPTH(jinfo, stackinfo); // Stackdepth here is adjusted for monitors
+    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2) | BC_COMPILED;
+
+    if (opcode > OPC_LAST_JAVA_OP)
+      switch (opcode) {
+      default:
+	if (Bytecodes::is_defined((Bytecodes::Code)opcode))
+	  opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
+	break;
+      case opc_return_register_finalizer:
+      case opc_fast_aldc_w:
+      case opc_fast_aldc:
+	break;
+      }
+
+    len = Bytecodes::length_for((Bytecodes::Code)opcode);
+    if (len <= 0) {
+      Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+      len = (Bytecodes::special_length_at
+	     (code,
+	      (address)(code_base+bci), (address)(code_base+code_size)));
+    }
+
+    if (IS_DEAD(stackinfo)) {
+      unsigned zlen = 0;
+      unsigned s_bci = bci;
+
+      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
+      do {
+	zlen += len;
+	bci += len;
+	if (bci >= code_size) break;
+	opcode = code_base[bci];
+	stackinfo = bc_stackinfo[bci];
+
+	if (stackinfo & BC_BRANCH_TARGET) break;
+	if (!IS_DEAD(stackinfo)) break;
+
+	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
+
+	if (opcode > OPC_LAST_JAVA_OP) {
+	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
+	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
+	}
+
+	len = Bytecodes::length_for((Bytecodes::Code)opcode);
+	if (len <= 0) {
+	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+	  len = (Bytecodes::special_length_at
+		 (code,
+		  (address)(code_base+bci), (address)(code_base+code_size)));
+	}
+
+      } while (1);
+      SET_END_BCI(start_idx, s_bci + zlen);
+      continue;
+    }
+
+#if 0
+    if (bci >= 4) {
+      unsigned zlen = 0;
+      unsigned s_bci = bci;
+
+      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
+      do {
+	zlen += len;
+	bci += len;
+	if (bci >= code_size) break;
+	opcode = code_base[bci];
+	stackinfo = bc_stackinfo[bci];
+
+	if (stackinfo & BC_BRANCH_TARGET) break;
+
+	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
+
+	if (opcode > OPC_LAST_JAVA_OP) {
+	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
+	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
+	}
+
+	len = Bytecodes::length_for((Bytecodes::Code)opcode);
+	if (len <= 0) {
+	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+	  len = (Bytecodes::special_length_at
+		 (code,
+		  (address)(code_base+bci), (address)(code_base+code_size)));
+	}
+
+      } while (1);
+      SET_END_BCI(start_idx, s_bci + zlen);
+      continue;
+    }
+#endif
+
+    SET_END_BCI(start_idx, bci + len);
+
+#ifdef THUMB2_JVMTI
+    // emit a start address --> bci map entry before
+    // generating machine code for this bytecode
+
+    void *addr = (void *)(codebuf->codebuf + codebuf->idx);
+    address_bci_map_add(addr, bci);
+#endif //THUMB2_JVMTI
+
+    switch (opcode) {
+      case opc_nop:
+	break;
+      case opc_aconst_null:
+	len += Thumb2_Imm(jinfo, 0, bci+1);
+	break;
+      case opc_iconst_m1:
+      case opc_iconst_0:
+      case opc_iconst_1:
+      case opc_iconst_2:
+      case opc_iconst_3:
+      case opc_iconst_4:
+      case opc_iconst_5:
+	len += Thumb2_Imm(jinfo, opcode - (unsigned)opc_iconst_0, bci+1);
+	break;
+      case opc_lconst_0:
+      case opc_lconst_1:
+	Thumb2_ImmX2(jinfo, opcode - (unsigned)opc_lconst_0, 0);
+	break;
+      case opc_fconst_0:
+      case opc_fconst_1:
+      case opc_fconst_2: {
+	unsigned v = 0;
+	if (opcode == (unsigned)opc_fconst_1) v = 0x3f800000;
+	if (opcode == (unsigned)opc_fconst_2) v = 0x40000000;
+	len += Thumb2_Imm(jinfo, v, bci+1);
+	break;
+      }
+      case opc_dconst_0:
+      case opc_dconst_1: {
+	unsigned v_hi = 0;
+	if (opcode == (unsigned)opc_dconst_1) v_hi = 0x3ff00000;
+	Thumb2_ImmX2(jinfo, 0, v_hi);
+	break;
+      }
+      case opc_bipush:
+	len += Thumb2_Imm(jinfo, GET_JAVA_S1(code_base+bci+1), bci+2);
+	break;
+      case opc_sipush:
+	len += Thumb2_Imm(jinfo, GET_JAVA_S2(code_base+bci+1), bci+3);
+	break;
+      case opc_ldc:
+      case opc_ldc_w:
+      case opc_ldc2_w: {
+	unsigned index = (opcode == (unsigned)opc_ldc) ?
+				code_base[bci+1] : GET_JAVA_U2(code_base+bci+1);
+	constantPoolOop constants = jinfo->method->constants();
+	unsigned v;
+
+	switch (v = constants->tag_at(index).value()) {
+	  case JVM_CONSTANT_Integer:
+	  case JVM_CONSTANT_Float:
+	    v = (unsigned)constants->int_at(index);
+	    len += Thumb2_Imm(jinfo, v, bci+len);
+	    break;
+	  case JVM_CONSTANT_Long:
+	  case JVM_CONSTANT_Double: {
+	    unsigned long long v;
+	    v = constants->long_at(index);
+	    Thumb2_ImmX2(jinfo, v & 0xffffffff, v >> 32);
+	    break;
+	  }
+	  case JVM_CONSTANT_Class:
+	  case JVM_CONSTANT_String: {
+	    Reg r;
+	    Thumb2_Spill(jinfo, 1, 0);
+	    r = JSTACK_REG(jstack);
+	    PUSH(jstack, r);
+	    load_istate(jinfo, r, ISTATE_METHOD, stackdepth+1);
+	    ldr_imm(jinfo->codebuf, r, r, METHOD_CONSTMETHOD);
+	    ldr_imm(jinfo->codebuf, r, r, METHOD_CONSTANTS);
+	    ldr_imm(jinfo->codebuf, r, r, CONSTANTPOOL_BASE + (index << 2));
+	    if (v == JVM_CONSTANT_Class)
+	      ldr_imm(jinfo->codebuf, r, r, KLASS_PART+KLASS_JAVA_MIRROR);
+	    break;
+	  }
+	  default:
+	    unsigned loc;
+
+	    JASSERT(opcode != opc_ldc2_w, "ldc2_w unresolved?");
+	    Thumb2_Flush(jinfo);
+	    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+//	    mov_imm(jinfo->codebuf, ARM_R1, opcode != opc_ldc);
+	    bl(jinfo->codebuf, handlers[opcode == opc_ldc ? H_LDC : H_LDC_W]);
+	  Thumb2_restore_local_refs(jinfo, stackdepth);
+	    ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT);
+	    mov_imm(jinfo->codebuf, ARM_R2, 0);
+	    str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT);
+	    loc = forward_cb(jinfo->codebuf);
+	    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
+	    cbnz_patch(jinfo->codebuf, ARM_R0, loc);
+	    PUSH(jstack, ARM_R0);
+	    break;
+	}
+	break;
+      }
+
+      case opc_iload:
+      case opc_fload:
+      case opc_aload:
+	Thumb2_Load(jinfo, code_base[bci+1], stackdepth);
+	break;
+      case opc_lload:
+      case opc_dload:
+	Thumb2_LoadX2(jinfo, code_base[bci+1], stackdepth);
+	break;
+      case opc_iload_0:
+      case opc_iload_1:
+      case opc_iload_2:
+      case opc_iload_3:
+      case opc_fload_0:
+      case opc_fload_1:
+      case opc_fload_2:
+      case opc_fload_3:
+      case opc_aload_0:
+      case opc_aload_1:
+      case opc_aload_2:
+      case opc_aload_3:
+	Thumb2_Load(jinfo, (opcode - opc_iload_0) & 3, stackdepth);
+	break;
+      case opc_lload_0:
+      case opc_lload_1:
+      case opc_lload_2:
+      case opc_lload_3:
+      case opc_dload_0:
+      case opc_dload_1:
+      case opc_dload_2:
+      case opc_dload_3:
+	Thumb2_LoadX2(jinfo, (opcode - opc_iload_0) & 3, stackdepth);
+	break;
+      case opc_iaload:
+      case opc_faload:
+      case opc_aaload:
+      case opc_baload:
+      case opc_caload:
+      case opc_saload:
+	Thumb2_Xaload(jinfo, opcode);
+	break;
+      case opc_laload:
+      case opc_daload:
+	Thumb2_X2aload(jinfo);
+	break;
+      case opc_istore:
+      case opc_fstore:
+      case opc_astore:
+	Thumb2_Store(jinfo, code_base[bci+1], stackdepth);
+	break;
+      case opc_lstore:
+      case opc_dstore:
+	Thumb2_StoreX2(jinfo, code_base[bci+1], stackdepth);
+	break;
+      case opc_istore_0:
+      case opc_istore_1:
+      case opc_istore_2:
+      case opc_istore_3:
+      case opc_fstore_0:
+      case opc_fstore_1:
+      case opc_fstore_2:
+      case opc_fstore_3:
+      case opc_astore_0:
+      case opc_astore_1:
+      case opc_astore_2:
+      case opc_astore_3:
+	Thumb2_Store(jinfo, (opcode - opc_istore_0) & 3, stackdepth);
+	break;
+      case opc_lstore_0:
+      case opc_lstore_1:
+      case opc_lstore_2:
+      case opc_lstore_3:
+      case opc_dstore_0:
+      case opc_dstore_1:
+      case opc_dstore_2:
+      case opc_dstore_3:
+	Thumb2_StoreX2(jinfo, (opcode - opc_istore_0) & 3, stackdepth);
+	break;
+      case opc_iastore:
+      case opc_fastore:
+      case opc_bastore:
+      case opc_castore:
+      case opc_sastore:
+	Thumb2_Xastore(jinfo, opcode);
+	break;
+      case opc_lastore:
+      case opc_dastore:
+	Thumb2_X2astore(jinfo);
+	break;
+
+      case opc_pop:
+      case opc_pop2:
+	Thumb2_Pop(jinfo, opcode - opc_pop + 1);
+	break;
+
+      case opc_dup:
+      case opc_dup_x1:
+      case opc_dup_x2:
+	Thumb2_Dup(jinfo, opcode - opc_dup);
+	break;
+
+      case opc_dup2:
+      case opc_dup2_x1:
+      case opc_dup2_x2:
+	Thumb2_Dup2(jinfo, opcode - opc_dup2);
+	break;
+
+      case opc_swap:
+	Thumb2_Swap(jinfo);
+	break;
+
+      case opc_iadd:
+      case opc_isub:
+      case opc_imul:
+      case opc_ishl:
+      case opc_ishr:
+      case opc_iushr:
+      case opc_iand:
+      case opc_ior:
+      case opc_ixor:
+	Thumb2_iOp(jinfo, opcode);
+	break;
+
+      case opc_ladd:
+      case opc_lsub:
+      case opc_land:
+      case opc_lor:
+      case opc_lxor:
+	Thumb2_lOp(jinfo, opcode);
+	break;
+
+      case opc_lshl: {
+	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
+	unsigned loc1, loc2;
+
+	Thumb2_Fill(jinfo, 3);
+	shift = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
+	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
+	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
+	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
+	tst_imm(jinfo->codebuf, shift, 32);
+	loc1 = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res_lo, 0);
+	dop_reg(jinfo->codebuf, DP_LSL, res_hi, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
+	dop_reg(jinfo->codebuf, DP_LSL, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_LSL, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
+	dop_reg(jinfo->codebuf, DP_LSR, ARM_IP, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_ORR, res_hi, res_hi, ARM_IP, SHIFT_LSL, 0);
+	branch_narrow_patch(jinfo->codebuf, loc2);
+	break;
+      }
+
+      case opc_lushr: {
+	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
+	unsigned loc1, loc2;
+
+	Thumb2_Fill(jinfo, 3);
+	shift = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
+	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
+	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
+	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
+	tst_imm(jinfo->codebuf, shift, 32);
+	loc1 = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res_hi, 0);
+	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
+	dop_reg(jinfo->codebuf, DP_LSR, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
+	dop_reg(jinfo->codebuf, DP_LSL, ARM_IP, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_ORR, res_lo, res_lo, ARM_IP, SHIFT_LSL, 0);
+	branch_narrow_patch(jinfo->codebuf, loc2);
+	break;
+      }
+
+      case opc_lshr: {
+	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
+	unsigned loc1, loc2;
+
+	Thumb2_Fill(jinfo, 3);
+	shift = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
+	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
+	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
+	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
+	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
+	tst_imm(jinfo->codebuf, shift, 32);
+	loc1 = forward_short(jinfo->codebuf);
+	asr_imm(jinfo->codebuf, res_hi, lho_hi, 31);
+	dop_reg(jinfo->codebuf, DP_ASR, res_lo, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
+	dop_reg(jinfo->codebuf, DP_ASR, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
+	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
+	dop_reg(jinfo->codebuf, DP_LSL, ARM_IP, lho_hi, ARM_IP, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_ORR, res_lo, res_lo, ARM_IP, SHIFT_LSL, 0);
+	branch_narrow_patch(jinfo->codebuf, loc2);
+	break;
+      }
+
+      case opc_lmul:
+	Thumb2_lmul(jinfo);
+	break;
+
+      case opc_fadd:
+      case opc_fsub:
+      case opc_fmul:
+      case opc_fdiv:
+	Thumb2_fOp(jinfo, opcode);
+	break;
+
+      case opc_dadd:
+      case opc_dsub:
+      case opc_dmul:
+      case opc_ddiv:
+	Thumb2_dOp(jinfo, opcode);
+	break;
+
+      case opc_fcmpl:
+      case opc_fcmpg: {
+	Thumb2_Stack *jstack = jinfo->jstack;
+	unsigned rho, lho, res;
+	unsigned loc1, loc2, loc_ne;
+
+	Thumb2_Fill(jinfo, 2);
+	rho = POP(jstack);
+	lho = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	res = PUSH(jstack, JSTACK_REG(jstack));
+	vmov_reg_s_toVFP(jinfo->codebuf, VFP_S0, lho);
+	vmov_reg_s_toVFP(jinfo->codebuf, VFP_S1, rho);
+	vcmp_reg_s(jinfo->codebuf, VFP_S0, VFP_S1, 1);
+	mov_imm(jinfo->codebuf, res, opcode == opc_fcmpl ? 1 : -1);
+	vmrs(jinfo->codebuf, ARM_PC);
+	loc1 = forward_short(jinfo->codebuf);
+	dop_imm_preserve(jinfo->codebuf, DP_RSB, res, res, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	vcmp_reg_s(jinfo->codebuf, VFP_S0, VFP_S1, 0);
+	loc_ne = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res, 0);
+	bcc_patch(jinfo->codebuf, opcode == opc_fcmpl ? COND_GT : COND_MI, loc1);
+	bcc_patch(jinfo->codebuf, opcode == opc_fcmpl ? COND_MI : COND_GT, loc2);
+	bcc_patch(jinfo->codebuf, COND_NE, loc_ne);
+	break;
+      }
+
+      case opc_dcmpl:
+      case opc_dcmpg: {
+	Thumb2_Stack *jstack = jinfo->jstack;
+	unsigned rho_lo, rho_hi, lho_lo, lho_hi, res;
+	unsigned loc1, loc2, loc_ne;
+
+	Thumb2_Fill(jinfo, 4);
+	rho_lo = POP(jstack);
+	rho_hi = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	res = PUSH(jstack, JSTACK_REG(jstack));
+	vmov_reg_d_toVFP(jinfo->codebuf, VFP_S0, lho_lo, lho_hi);
+	vmov_reg_d_toVFP(jinfo->codebuf, VFP_S1, rho_lo, rho_hi);
+	vcmp_reg_d(jinfo->codebuf, VFP_S0, VFP_S1, 1);
+	mov_imm(jinfo->codebuf, res, opcode == opc_dcmpl ? 1 : -1);
+	vmrs(jinfo->codebuf, ARM_PC);
+	loc1 = forward_short(jinfo->codebuf);
+	dop_imm_preserve(jinfo->codebuf, DP_RSB, res, res, 0);
+	loc2 = forward_short(jinfo->codebuf);
+	vcmp_reg_d(jinfo->codebuf, VFP_S0, VFP_S1, 0);
+	loc_ne = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res, 0);
+	bcc_patch(jinfo->codebuf, opcode == opc_dcmpl ? COND_GT : COND_MI, loc1);
+	bcc_patch(jinfo->codebuf, opcode == opc_dcmpl ? COND_MI : COND_GT, loc2);
+	bcc_patch(jinfo->codebuf, COND_NE, loc_ne);
+	break;
+      }
+
+      case opc_drem:
+      case opc_lrem:
+      case opc_ldiv: {
+	Reg src[4], dst[4];
+
+	Thumb2_Fill(jinfo, 4);
+	src[2] = POP(jstack);
+	src[3] = POP(jstack);
+	src[0] = POP(jstack);
+	src[1] = POP(jstack);
+	Thumb2_Flush(jinfo);
+	dst[0] = ARM_R0;
+	dst[1] = ARM_R1;
+	dst[2] = ARM_R2;
+	dst[3] = ARM_R3;
+	mov_multiple(jinfo->codebuf, dst, src, 4);
+	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	if (opcode != opc_lrem) {
+	  PUSH(jstack, ARM_R1);
+	  PUSH(jstack, ARM_R0);
+	} else {
+	  PUSH(jstack, ARM_R3);
+	  PUSH(jstack, ARM_R2);
+	}
+	break;
+      }
+
+      case opc_frem:
+      case opc_idiv:
+      case opc_irem: {
+	Reg r_rho, r_lho;
+
+	Thumb2_Fill(jinfo, 2);
+	r_rho = POP(jstack);
+	r_lho = POP(jstack);
+	Thumb2_Flush(jinfo);
+	if (r_rho == ARM_R0) {
+	  if (r_lho == ARM_R1) {
+	    mov_reg(jinfo->codebuf, ARM_IP, r_rho);
+	    mov_reg(jinfo->codebuf, ARM_R0, r_lho);
+	    mov_reg(jinfo->codebuf, ARM_R1, ARM_IP);
+	  } else {
+	    mov_reg(jinfo->codebuf, ARM_R1, r_rho);
+	    mov_reg(jinfo->codebuf, ARM_R0, r_lho);
+	  }
+	} else {
+	  mov_reg(jinfo->codebuf, ARM_R0, r_lho);
+	  mov_reg(jinfo->codebuf, ARM_R1, r_rho);
+	}
+	if (opcode == opc_frem)
+	  bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	else
+	  blx(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_f2i:
+      case opc_i2f: {
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R0, r);
+	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_f2d:
+      case opc_f2l:
+      case opc_i2d: {
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R0, r);
+	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	PUSH(jstack, ARM_R1);
+	PUSH(jstack, ARM_R0);
+	break;
+    }
+
+      case opc_d2f:
+      case opc_d2i:
+      case opc_l2d:
+      case opc_d2l:
+      case opc_l2f: {
+	Reg lo, hi;
+
+	Thumb2_Fill(jinfo, 2);
+	lo = POP(jstack);
+	hi = POP(jstack);
+	Thumb2_Flush(jinfo);
+	if (hi == ARM_R0) {
+	  if (lo == ARM_R1) {
+	    mov_reg(jinfo->codebuf, ARM_IP, hi);
+	    mov_reg(jinfo->codebuf, ARM_R0, lo);
+	    mov_reg(jinfo->codebuf, ARM_R1, ARM_IP);
+	  } else {
+	    mov_reg(jinfo->codebuf, ARM_R1, hi);
+	    mov_reg(jinfo->codebuf, ARM_R0, lo);
+	  }
+	} else {
+	  mov_reg(jinfo->codebuf, ARM_R0, lo);
+	  mov_reg(jinfo->codebuf, ARM_R1, hi);
+	}
+	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
+	if (opcode == opc_l2d || opcode == opc_d2l) PUSH(jstack, ARM_R1);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_ineg:
+	Thumb2_iNeg(jinfo, opcode);
+	break;
+
+      case opc_lneg:
+	Thumb2_lNeg(jinfo, opcode);
+	break;
+
+      case opc_fneg:
+	Thumb2_fNeg(jinfo, opcode);
+	break;
+
+      case opc_dneg:
+	Thumb2_dNeg(jinfo);
+	break;
+
+      case opc_i2l: {
+	unsigned r, r_res_lo, r_res_hi;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Spill(jinfo, 2, 0);
+	r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
+	r_res_lo = PUSH(jstack, JSTACK_REG(jstack));
+	if (r == r_res_hi) {
+	  SWAP(jstack);
+	  r_res_hi = r_res_lo;
+	  r_res_lo = r;
+	}
+	mov_reg(jinfo->codebuf, r_res_lo, r);
+	asr_imm(jinfo->codebuf, r_res_hi, r, 31);
+	break;
+      }
+
+      case opc_l2i: {
+	unsigned r_lo, r_hi;
+	unsigned r;
+
+	Thumb2_Fill(jinfo, 2);
+	r_lo = POP(jstack);
+	r_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r = PUSH(jstack, r_lo);
+	break;
+      }
+
+      case opc_i2b: {
+	unsigned r_src, r_dst;
+
+	Thumb2_Fill(jinfo, 1);
+	r_src = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r_dst = PUSH(jstack, JSTACK_REG(jstack));
+	sxtb(jinfo->codebuf, r_dst, r_src);
+	break;
+      }
+
+      case opc_i2s: {
+	unsigned r_src, r_dst;
+
+	Thumb2_Fill(jinfo, 1);
+	r_src = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r_dst = PUSH(jstack, JSTACK_REG(jstack));
+	sxth(jinfo->codebuf, r_dst, r_src);
+	break;
+      }
+
+      case opc_i2c: {
+	unsigned r_src, r_dst;
+
+	Thumb2_Fill(jinfo, 1);
+	r_src = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r_dst = PUSH(jstack, JSTACK_REG(jstack));
+	uxth(jinfo->codebuf, r_dst, r_src);
+	break;
+      }
+
+      case opc_lcmp: {
+	unsigned lho_lo, lho_hi;
+	unsigned rho_lo, rho_hi;
+	unsigned r_tmp_lo, r_tmp_hi;
+	unsigned res;
+	unsigned loc_lt, loc_eq;
+
+	Thumb2_Fill(jinfo, 4);
+	rho_lo = POP(jstack);
+	rho_hi = POP(jstack);
+	lho_lo = POP(jstack);
+	lho_hi = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	res = JSTACK_REG(jstack);
+	PUSH(jstack, res);
+	r_tmp_lo = Thumb2_Tmp(jinfo, (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi));
+	r_tmp_hi = Thumb2_Tmp(jinfo, (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi)|(1<<r_tmp_lo));
+	dop_reg(jinfo->codebuf, DP_SUB, r_tmp_lo, lho_lo, rho_lo, SHIFT_LSL, 0);
+	dop_reg(jinfo->codebuf, DP_SBC, r_tmp_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
+	mov_imm(jinfo->codebuf, res, (unsigned)-1);
+	loc_lt = forward_short(jinfo->codebuf);
+	dop_reg(jinfo->codebuf, DP_ORR, res, r_tmp_lo, r_tmp_hi, SHIFT_LSL, 0);
+	loc_eq = forward_short(jinfo->codebuf);
+	mov_imm(jinfo->codebuf, res, 1);
+	bcc_patch(jinfo->codebuf, COND_LT, loc_lt);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc_eq);
+	break;
+      }
+
+      case opc_iinc: {
+	unsigned local = code_base[bci+1];
+	int constant = GET_JAVA_S1(code_base+bci+2);
+	unsigned r = jinfo->jregs->r_local[local];
+
+	if (!r) {
+	  int nlocals = jinfo->method->max_locals();
+	  r = Thumb2_Tmp(jinfo, 0);
+	  stackdepth -= jstack->depth;
+	  load_local(jinfo, r, local, stackdepth);
+	  add_imm(jinfo->codebuf, r, r, constant);
+	  store_local(jinfo, r, local, stackdepth);
+	} else {
+	  Thumb2_Corrupt(jinfo, r, 0);
+	  add_imm(jinfo->codebuf, r, r, constant);
+	}
+	break;
+      }
+
+      case opc_getfield: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	Reg r_obj;
+
+        cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+ 	  int java_index = GET_NATIVE_U2(code_base+bci+1);
+	  constantPoolOop pool = jinfo->method->constants();
+	  Symbol *sig = pool->signature_ref_at(java_index);
+	  const jbyte *base = sig->base();
+	  jbyte c = *base;
+	  int handler = H_GETFIELD_WORD;
+
+	  if (c == 'J' || c == 'D') handler = H_GETFIELD_DW;
+	  if (c == 'B' || c == 'Z') handler = H_GETFIELD_SB;
+	  if (c == 'C') handler = H_GETFIELD_H;
+	  if (c == 'S') handler = H_GETFIELD_SH;
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[handler]);
+	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	TosState tos_type = cache->flag_state();
+	int field_offset = cache->f2_as_index();
+
+	if (tos_type == ltos || tos_type == dtos) {
+	  Reg r_lo, r_hi;
+	  Thumb2_Fill(jinfo, 1);
+	  r_obj = POP(jstack);
+	  Thumb2_Spill(jinfo, 2, 0);
+	  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+	  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+	  Thumb2_load_long(jinfo, r_lo, r_hi, r_obj, field_offset,
+			   cache->is_volatile());
+	} else {
+	  Reg r;
+
+	  Thumb2_Fill(jinfo, 1);
+	  r_obj = POP(jstack);
+	  Thumb2_Spill(jinfo, 1, 0);
+	  r = JSTACK_REG(jstack);
+	  PUSH(jstack, r);
+	  if (tos_type == btos)
+	    ldrsb_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else if (tos_type == ctos)
+	    ldrh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else if (tos_type == stos)
+	    ldrsh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else
+	    ldr_imm(jinfo->codebuf, r, r_obj, field_offset);
+	}
+
+	if (cache->is_volatile())
+	  fullBarrier(jinfo->codebuf);
+
+	break;
+      }
+
+      case opc_getstatic: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+
+        cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+	  int java_index = GET_NATIVE_U2(code_base+bci+1);
+	  constantPoolOop pool = jinfo->method->constants();
+	  Symbol *sig = pool->signature_ref_at(java_index);
+	  const jbyte *base = sig->base();
+	  jbyte c = *base;
+	  int handler = H_GETSTATIC_WORD;
+
+	  if (c == 'J' || c == 'D') handler = H_GETSTATIC_DW;
+	  if (c == 'B' || c == 'Z') handler = H_GETSTATIC_SB;
+	  if (c == 'C') handler = H_GETSTATIC_H;
+	  if (c == 'S') handler = H_GETSTATIC_SH;
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[handler]);
+	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	TosState tos_type = cache->flag_state();
+	int field_offset = cache->f2_as_index();
+	JDEBUG_( tty->print("f2_as_index getstatic %d: %s: %s %d\n", index , name->as_C_string(), sig->as_C_string(), field_offset); );
+
+	if (tos_type == ltos || tos_type == dtos) {
+	  Reg r_lo, r_hi, r_addr;
+	  Thumb2_Spill(jinfo, 2, 0);
+	  r_hi = PUSH(jstack, JSTACK_REG(jstack));
+	  r_lo = PUSH(jstack, JSTACK_REG(jstack));
+	  r_addr = Thumb2_Tmp(jinfo, (1<<r_hi) | (1<<r_lo));
+	  load_istate(jinfo, r_lo, ISTATE_CONSTANTS, stackdepth+2);
+	  ldr_imm(jinfo->codebuf, r_addr, r_lo, CP_OFFSET + (index << 4) + 4);
+	  Thumb2_load_long(jinfo, r_lo, r_hi, r_addr, field_offset,
+			   cache->is_volatile());
+	} else {
+	  Reg r;
+	  Thumb2_Spill(jinfo, 1, 0);
+	  r = JSTACK_REG(jstack);
+	  PUSH(jstack, r);
+	  load_istate(jinfo, r, ISTATE_CONSTANTS, stackdepth+1);
+	  ldr_imm(jinfo->codebuf, r, r, CP_OFFSET + (index << 4) + 4);
+	  if (tos_type == btos)
+	    ldrsb_imm(jinfo->codebuf, r, r, field_offset);
+	  else if (tos_type == ctos)
+	    ldrh_imm(jinfo->codebuf, r, r, field_offset);
+	  else if (tos_type == stos)
+	    ldrsh_imm(jinfo->codebuf, r, r, field_offset);
+	  else
+	    ldr_imm(jinfo->codebuf, r, r, field_offset);
+	}
+
+	if (cache->is_volatile())
+	  fullBarrier(jinfo->codebuf);
+
+	break;
+      }
+
+      case opc_putfield: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	Reg r_obj;
+
+        cache = cp->entry_at(index);
+
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+	  int java_index = GET_NATIVE_U2(code_base+bci+1);
+	  constantPoolOop pool = jinfo->method->constants();
+	  Symbol *sig = pool->signature_ref_at(java_index);
+	  const jbyte *base = sig->base();
+	  jbyte c = *base;
+	  int handler = H_PUTFIELD_WORD;
+
+	  if (c == 'J' || c == 'D') handler = H_PUTFIELD_DW;
+	  if (c == 'B' || c == 'Z') handler = H_PUTFIELD_B;
+	  if (c == 'C' || c == 'S') handler = H_PUTFIELD_H;
+ 	  if (c == '[' || c == 'L') handler = H_PUTFIELD_A;
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[handler]);
+	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+
+	  break;
+	}
+
+	if (cache->is_volatile())
+	  storeBarrier(jinfo->codebuf);
+
+	TosState tos_type = cache->flag_state();
+	int field_offset = cache->f2_as_index();
+
+	if (tos_type == ltos || tos_type == dtos) {
+	  Reg r_lo, r_hi;
+	  Thumb2_Fill(jinfo, 3);
+	  r_lo = POP(jstack);
+	  r_hi = POP(jstack);
+	  r_obj = POP(jstack);
+	  Thumb2_store_long(jinfo, r_lo, r_hi, r_obj, field_offset, cache->is_volatile());
+	} else {
+	  Reg r;
+	  Thumb2_Fill(jinfo, 2);
+	  r = POP(jstack);
+	  r_obj = POP(jstack);
+	  if (tos_type == btos)
+	    strb_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else if (tos_type == ctos | tos_type == stos)
+	    strh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else {
+	    str_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    if (tos_type == atos) {
+	      Thumb2_Flush(jinfo);
+	      mov_reg(jinfo->codebuf, ARM_R0, r_obj);
+	      bl(jinfo->codebuf, handlers[H_APUTFIELD]);
+	    }
+	  }
+	}
+
+	if (cache->is_volatile())
+	  fullBarrier(jinfo->codebuf);
+
+	break;
+      }
+
+      case opc_putstatic: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+
+        cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+	  int java_index = GET_NATIVE_U2(code_base+bci+1);
+	  constantPoolOop pool = jinfo->method->constants();
+	  Symbol *sig = pool->signature_ref_at(java_index);
+	  const jbyte *base = sig->base();
+	  jbyte c = *base;
+	  int handler = H_PUTSTATIC_WORD;
+
+	  if (c == 'J' || c == 'D') handler = H_PUTSTATIC_DW;
+	  if (c == 'B' || c == 'Z') handler = H_PUTSTATIC_B;
+	  if (c == 'C' || c == 'S') handler = H_PUTSTATIC_H;
+	  if (c == '[' || c == 'L') handler = H_PUTSTATIC_A;
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_local_refs(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[handler]);
+	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	if (cache->is_volatile())
+	  storeBarrier(jinfo->codebuf);
+
+	TosState tos_type = cache->flag_state();
+	int field_offset = cache->f2_as_index();
+	Reg r_obj;
+
+	if (tos_type == ltos || tos_type == dtos) {
+	  Reg r_lo, r_hi;
+	  Thumb2_Fill(jinfo, 2);
+	  r_lo = POP(jstack);
+	  r_hi = POP(jstack);
+	  Thumb2_Spill(jinfo, 1, (1<<r_lo)|(1<<r_hi));
+	  r_obj = JSTACK_PREFER(jstack, ~((1<<r_lo)|(1<<r_hi)));
+	  JASSERT(r_obj != r_lo && r_obj != r_hi, "corruption in putstatic");
+	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-2);
+	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4);
+	  Thumb2_store_long(jinfo, r_lo, r_hi, r_obj, field_offset, cache->is_volatile());
+	} else {
+	  Reg r;
+	  Thumb2_Fill(jinfo, 1);
+	  r = POP(jstack);
+	  Thumb2_Spill(jinfo, 1, (1<<r));
+	  r_obj = JSTACK_PREFER(jstack, ~(1<<r));
+	  JASSERT(r_obj != r, "corruption in putstatic");
+	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-1);
+	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4);
+	  if (tos_type == btos)
+	    strb_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else if (tos_type == ctos | tos_type == stos)
+	    strh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	  else {
+	    str_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    if (tos_type == atos) {
+	      Thumb2_Flush(jinfo);
+	      mov_reg(jinfo->codebuf, ARM_R0, r_obj);
+	      bl(jinfo->codebuf, handlers[H_APUTFIELD]);
+	    }
+	  }
+	}
+
+	if (cache->is_volatile())
+	  fullBarrier(jinfo->codebuf);
+
+	break;
+      }
+
+      case opc_invokevirtual:
+      case opc_invokestatic:
+      case opc_invokespecial: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	unsigned loc;
+	methodOop callee;
+
+	// Call Debug if we're about to enter a synchronized method.
+#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
+	if (DebugSwitch && jinfo->method->is_synchronized()) {
+	  stm(jinfo->codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+	  add_imm(jinfo->codebuf, ARM_R0, ISTATE_REG(jinfo), ISTATE_OFFSET(jinfo, stackdepth, 0));
+	  mov_imm(jinfo->codebuf, ARM_IP, (u32)Debug);
+	  load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
+	  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+	  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
+	  store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
+	  blx_reg(jinfo->codebuf, ARM_IP);
+	  ldm(jinfo->codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, POP_FD, 1);
+	}
+#undef DEBUG_REGSET
+
+        cache = cp->entry_at(index);
+        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
+	  Thumb2_Flush(jinfo);
+	  Thumb2_save_all_locals(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf,
+	    handlers[opcode == opc_invokestatic ? H_INVOKESTATIC :
+		     opcode == opc_invokespecial ? H_INVOKESPECIAL : H_INVOKEVIRTUAL]);
+	  Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	callee = opcode == opc_invokevirtual ? (methodOop)cache->f2_as_index() : (methodOop)cache->f1_as_instance();
+
+	if (opcode != opc_invokevirtual || cache->is_vfinal()) {
+	  if (handle_special_method(callee, jinfo, stackdepth))
+	    break;
+	}
+
+	if ((opcode != opc_invokevirtual || cache->is_vfinal()) && callee->is_accessor()) {
+	  u1 *code = callee->code_base();
+	  int index = GET_NATIVE_U2(&code[2]);
+	  constantPoolCacheOop callee_cache = callee->constants()->cache();
+	  ConstantPoolCacheEntry *entry = callee_cache->entry_at(index);
+	  Reg r_obj, r;
+
+	  if (entry->is_resolved(Bytecodes::_getfield)) {
+	    JASSERT(cache->parameter_size() == 1, "not 1 parameter to accessor");
+
+	    TosState tos_type = entry->flag_state();
+	    int field_offset = entry->f2_as_index();
+
+	    JASSERT(tos_type == btos || tos_type == ctos || tos_type == stos || tos_type == atos || tos_type == itos, "not itos or atos");
+
+	    Thumb2_Fill(jinfo, 1);
+	    r_obj = POP(jstack);
+	    Thumb2_Spill(jinfo, 1, 0);
+	    r = JSTACK_REG(jstack);
+	    PUSH(jstack, r);
+	    if (tos_type == btos)
+	      ldrb_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    else if (tos_type == ctos)
+	      ldrh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    else if (tos_type == stos)
+	      ldrsh_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    else
+	      ldr_imm(jinfo->codebuf, r, r_obj, field_offset);
+	    break;
+	  }
+	}
+
+ 	Thumb2_Flush(jinfo);
+	if (OSPACE) {
+	  Thumb2_save_all_locals(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[
+	      opcode == opc_invokestatic ? H_INVOKESTATIC_RESOLVED :
+	      opcode == opc_invokespecial ? H_INVOKESPECIAL_RESOLVED :
+	      cache->is_vfinal() ? H_INVOKEVFINAL : H_INVOKEVIRTUAL_RESOLVED]);
+	  Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
+ 	mov_imm(jinfo->codebuf, ARM_R1, 0);
+	if (opcode != opc_invokestatic)
+ 	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int));
+	if (opcode != opc_invokevirtual || cache->is_vfinal())
+	  load_istate(jinfo, ARM_R0, ISTATE_CONSTANTS, stackdepth);
+	ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+	if (opcode != opc_invokestatic)
+	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4);
+	if (opcode != opc_invokevirtual || cache->is_vfinal())
+	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0,
+		CP_OFFSET + (index << 4) + (opcode == opc_invokevirtual ? 8 : 4));
+	else
+	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R3, INSTANCEKLASS_VTABLE_OFFSET + cache->f2_as_index() * 4);
+	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
+ 	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP);
+	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP);
+ 	ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED);
+	store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
+ 	str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP);
+ 	Thumb2_save_all_locals(jinfo, stackdepth);
+	sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
+ 	ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0);
+	store_istate(jinfo, Rstack, ISTATE_STACK, stackdepth+1);
+	add_imm(jinfo->codebuf, ARM_R3, ARM_R3, FAST_ENTRY_OFFSET);
+ 	blx_reg(jinfo->codebuf, ARM_R3);
+ 	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
+	stackdepth = STACKDEPTH(jinfo, bc_stackinfo[bci+len]);
+	ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP);
+	load_istate(jinfo, ARM_R2, ISTATE_STACK_LIMIT, stackdepth);
+ 	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME);
+	Thumb2_restore_all_locals(jinfo, stackdepth);
+	mov_imm(jinfo->codebuf, ARM_R0, 0);   // set last SP to zero
+					      // before setting FP
+	str_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_LAST_JAVA_SP);
+	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME);
+	Thumb2_restore_all_locals(jinfo, stackdepth);
+	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
+	ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC);
+	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP);
+	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP);
+	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_LAST_JAVA_SP);
+	cmp_imm(jinfo->codebuf, ARM_R3, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS], COND_NE);
+	break;
+      }
+
+      case opc_invokeinterface: {
+	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
+        ConstantPoolCacheEntry* cache;
+	int index = GET_NATIVE_U2(code_base+bci+1);
+	unsigned loc, loc_inc_ex;
+
+	// Currently we just call the unresolved invokeinterface entry for resolved /
+	// unresolved alike!
+	Thumb2_Flush(jinfo);
+	Thumb2_save_all_locals(jinfo, stackdepth);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	mov_imm(jinfo->codebuf, ARM_R1, index);
+	blx(jinfo->codebuf, handlers[H_INVOKEINTERFACE]);
+	Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	break;
+      }
+
+    case opc_invokedynamic:
+      {
+	Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
+	break;
+      }
+
+    case opc_fast_aldc_w:
+    case opc_fast_aldc:
+      {
+	unsigned index = (opcode == (unsigned)opc_fast_aldc) ?
+				code_base[bci+1] : GET_NATIVE_U2(code_base+bci+1);
+	constantPoolOop constants = jinfo->method->constants();
+	ConstantPoolCacheEntry* cpce = constants->cache()->entry_at(index);
+        if (! cpce->is_f1_null()) {
+	  Thumb2_Spill(jinfo, 1, 0);
+	  int r = JSTACK_REG(jstack);
+	  PUSH(jstack, r);
+	  ldr_imm(jinfo->codebuf, r, Ristate, ISTATE_CONSTANTS);
+	  ldr_imm(jinfo->codebuf, r, r, CP_OFFSET + (index << 4) + 4); // offset to cache->f1_as_instance()
+	} else {
+	  Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
+	}
+	break;
+      }
+
+      case opc_jsr_w:
+      case opc_jsr: {
+	int offset = opcode == opc_jsr ?
+		GET_JAVA_S2(jinfo->code_base + bci + 1) :
+		GET_JAVA_U4(jinfo->code_base + bci + 1);
+	Reg r;
+
+	Thumb2_Spill(jinfo, 1, 0);
+	r = JSTACK_REG(jstack);
+	PUSH(jstack, r);
+	mov_imm(jinfo->codebuf, r, bci + ((opcode == opc_jsr) ? 3 : 5));
+	Thumb2_Flush(jinfo);
+	bci = Thumb2_Goto(jinfo, bci, offset, len);
+	len = 0;
+	break;
+      }
+
+      case opc_ret: {
+	Thumb2_Exit(jinfo, H_RET, bci, stackdepth);
+	break;
+      }
+
+      case opc_goto:
+      case opc_goto_w: {
+	int offset = opcode == opc_goto ?
+		GET_JAVA_S2(jinfo->code_base + bci + 1) :
+		GET_JAVA_U4(jinfo->code_base + bci + 1);
+	Thumb2_Flush(jinfo);
+	bci = Thumb2_Goto(jinfo, bci, offset, len, stackdepth);
+	len = 0;
+	break;
+      }
+
+      case opc_athrow:
+	Thumb2_Exit(jinfo, H_ATHROW, bci, stackdepth);
+	break;
+
+      case opc_ifeq:
+      case opc_ifne:
+      case opc_iflt:
+      case opc_ifge:
+      case opc_ifgt:
+      case opc_ifle:
+      case opc_ifnull:
+      case opc_ifnonnull: {
+	Reg r;
+	unsigned cond = opcode - opc_ifeq;
+	Thumb2_Cond_Safepoint(jinfo, stackdepth, bci);
+	if (opcode >= opc_ifnull) cond = opcode - opc_ifnull;
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	cmp_imm(jinfo->codebuf, r, 0);
+	bci = Thumb2_Branch(jinfo, bci, cond);
+	len = 0;
+	break;
+      }
+
+      case opc_if_icmpeq:
+      case opc_if_icmpne:
+      case opc_if_icmplt:
+      case opc_if_icmpge:
+      case opc_if_icmpgt:
+      case opc_if_icmple:
+      case opc_if_acmpeq:
+      case opc_if_acmpne: {
+	Reg r_lho, r_rho;
+	unsigned cond = opcode - opc_if_icmpeq;
+	Thumb2_Cond_Safepoint(jinfo, stackdepth, bci);
+	if (opcode >= opc_if_acmpeq) cond = opcode - opc_if_acmpeq;
+	Thumb2_Fill(jinfo, 2);
+	r_rho = POP(jstack);
+	r_lho = POP(jstack);
+	Thumb2_Flush(jinfo);
+	cmp_reg(jinfo->codebuf, r_lho, r_rho);
+	bci = Thumb2_Branch(jinfo, bci, cond);
+	len = 0;
+	break;
+      }
+
+      case opc_return:
+      case opc_dreturn:
+      case opc_lreturn:
+      case opc_ireturn:
+      case opc_freturn:
+      case opc_areturn:
+	Thumb2_Return(jinfo, opcode, bci, stackdepth);
+	break;
+
+      case opc_return_register_finalizer: {
+	Thumb2_Stack *jstack = jinfo->jstack;
+	Reg r, r_tmp;
+	unsigned loc_eq;
+
+	Thumb2_Flush(jinfo);
+	Thumb2_Load(jinfo, 0, stackdepth);
+	r = POP(jstack);
+	r_tmp = Thumb2_Tmp(jinfo, (1<<r));
+	ldr_imm(jinfo->codebuf, r_tmp, r, 4);
+	ldr_imm(jinfo->codebuf, r_tmp, r_tmp, KLASS_PART+KLASS_ACCESSFLAGS);
+	tst_imm(jinfo->codebuf, r_tmp, JVM_ACC_HAS_FINALIZER);
+	loc_eq = forward_short(jinfo->codebuf);
+	Thumb2_save_local_refs(jinfo, stackdepth);
+	mov_reg(jinfo->codebuf, ARM_R1, r);
+	load_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
+	ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD);
+	add_imm(jinfo->codebuf, ARM_R0, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	store_istate(jinfo, ARM_R0, ISTATE_BCP, stackdepth);
+	sub_imm(jinfo->codebuf, ARM_R0, Rstack, 4);
+	store_istate(jinfo, ARM_R0, ISTATE_STACK, stackdepth);
+
+	mov_reg(jinfo->codebuf, ARM_R0, Rthread);
+	mov_imm(jinfo->codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc);
+	blx_reg(jinfo->codebuf, ARM_R3);
+
+	ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC);
+	cmp_imm(jinfo->codebuf, ARM_R3, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_NE);
+	bcc_patch(jinfo->codebuf, COND_EQ, loc_eq);
+	Thumb2_Return(jinfo, opc_return, bci, stackdepth);
+	break;
+      }
+
+      case opc_new: {
+	unsigned loc;
+
+	Thumb2_Flush(jinfo);
+	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_NEW]);
+      Thumb2_restore_local_refs(jinfo, stackdepth);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_aastore: {
+	Reg src[3], dst[3];
+	unsigned loc;
+
+	Thumb2_Fill(jinfo, 3);
+	src[0] = POP(jstack);	// value
+	src[1] = POP(jstack);	// index
+	src[2] = POP(jstack);	// arrayref
+	Thumb2_Flush(jinfo);
+	dst[0] = ARM_R1;
+	dst[1] = ARM_R2;
+	dst[2] = ARM_R3;
+	mov_multiple(jinfo->codebuf, dst, src, 3);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth - 3);	// 3 args popped above
+	bl(jinfo->codebuf, handlers[H_AASTORE]);
+      Thumb2_restore_local_refs(jinfo, stackdepth - 3);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_NE);
+	break;
+      }
+
+      case opc_instanceof: {
+	unsigned loc;
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R2, r);
+	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth - 1);
+	bl(jinfo->codebuf, handlers[H_INSTANCEOF]);
+      Thumb2_restore_local_refs(jinfo, stackdepth - 1);	// 1 arg popped above
+	cmp_imm(jinfo->codebuf, ARM_R0, (unsigned)-1);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_checkcast: {
+	unsigned loc;
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = TOS(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R2, r);
+	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_CHECKCAST]);
+      Thumb2_restore_local_refs(jinfo, stackdepth);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_NE);
+	break;
+      }
+
+      case opc_monitorenter:
+	Thumb2_Flush(jinfo);
+	Thumb2_save_all_locals(jinfo, stackdepth);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	bl(jinfo->codebuf, handlers[H_MONITORENTER]);
+	Thumb2_restore_all_locals(jinfo, stackdepth);
+	break;
+
+      case opc_monitorexit: {
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R1, r);
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+        Thumb2_save_local_refs(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_MONITOREXIT]);
+        Thumb2_restore_local_refs(jinfo, stackdepth);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_NE);
+	break;
+      }
+
+      case opc_newarray: {
+	Reg r;
+	unsigned loc;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R2, r);
+	mov_imm(jinfo->codebuf, ARM_R1, code_base[bci+1]);
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth-1);
+	bl(jinfo->codebuf, handlers[H_NEWARRAY]);
+      Thumb2_restore_local_refs(jinfo, stackdepth-1);
+	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT);
+	mov_imm(jinfo->codebuf, ARM_R2, 0);
+  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_anewarray: {
+	Reg r;
+	unsigned loc;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R3, r);
+	mov_imm(jinfo->codebuf, ARM_R2, GET_JAVA_U2(code_base+bci+1));
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+      Thumb2_save_local_refs(jinfo, stackdepth-1);
+	bl(jinfo->codebuf, handlers[H_ANEWARRAY]);
+      Thumb2_restore_local_refs(jinfo, stackdepth-1);
+	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT);
+	mov_imm(jinfo->codebuf, ARM_R2, 0);
+  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_multianewarray: {
+	unsigned loc;
+
+	Thumb2_Flush(jinfo);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	mov_imm(jinfo->codebuf, ARM_R1, code_base[bci+3] * 4);
+      Thumb2_save_local_refs(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_MULTIANEWARRAY]);
+      Thumb2_restore_local_refs(jinfo, stackdepth - code_base[bci+3]);
+	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT);
+	mov_imm(jinfo->codebuf, ARM_R2, 0);
+  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION], COND_EQ);
+	PUSH(jstack, ARM_R0);
+	break;
+      }
+
+      case opc_arraylength: {
+	Reg r_obj, r_len;
+
+	Thumb2_Fill(jinfo, 1);
+	r_obj = POP(jstack);
+	Thumb2_Spill(jinfo, 1, 0);
+	r_len = JSTACK_REG(jstack);
+	PUSH(jstack, r_len);
+	ldr_imm(jinfo->codebuf, r_len, r_obj, 8);
+	break;
+      }
+
+      case opc_lookupswitch: {
+	unsigned w;
+	unsigned nbci;
+	int def;
+	int npairs;	// The Java spec says signed but must be >= 0??
+	unsigned *table, *tablep;
+	unsigned r;
+	unsigned oldidx;
+	unsigned table_loc;
+	int i;
+
+	nbci = bci & ~3;
+	w = *(unsigned int *)(code_base + nbci + 4);
+	def = bci + (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 8);
+	npairs = (int)BYTESEX_REVERSE(w);
+	table = (unsigned int *)(code_base + nbci + 12);
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+
+	Thumb2_Flush(jinfo);
+
+	table_loc = out_loc(jinfo->codebuf);
+	for (i = 0, tablep = table; i < npairs; i++) {
+	  unsigned match;
+
+	  w = tablep[0];
+	  match = BYTESEX_REVERSE(w);
+	  tablep += 2;
+	  cmp_imm(jinfo->codebuf, r, match);
+	  forward_long(jinfo->codebuf);
+	}
+	forward_long(jinfo->codebuf);
+	Thumb2_codegen(jinfo, bci+len);
+
+	oldidx = codebuf->idx;
+	codebuf->idx = table_loc >> 1;
+	for (i = 0, tablep = table; i < npairs; i++) {
+	  unsigned match;
+	  unsigned dest;
+	  unsigned loc;
+
+	  w = tablep[0];
+	  match = BYTESEX_REVERSE(w);
+	  w = tablep[1];
+	  dest = bci + (int)BYTESEX_REVERSE(w);
+	  tablep += 2;
+	  cmp_imm(jinfo->codebuf, r, match);
+	  JASSERT(jinfo->bc_stackinfo[dest] & BC_COMPILED, "code not compiled");
+	  loc = forward_long(jinfo->codebuf);
+	  branch_patch(jinfo->codebuf, COND_EQ, loc, jinfo->bc_stackinfo[dest] & ~BC_FLAGS_MASK);
+	}
+	JASSERT(jinfo->bc_stackinfo[def] & BC_COMPILED, "default in lookupswitch not compiled");
+	branch_uncond_patch(jinfo->codebuf, out_loc(jinfo->codebuf), jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
+	codebuf->idx = oldidx;
+
+	bci = (unsigned)-1;
+	len = 0;
+
+	break;
+      }
+
+      case opc_tableswitch: {
+	int low, high, i;
+	unsigned w;
+	unsigned *table, *tablep;
+	unsigned nbci;
+	int def;
+	unsigned loc, table_loc;
+	unsigned r, rs;
+	unsigned oldidx;
+	unsigned negative_offsets, negative_branch_table;
+
+	nbci = bci & ~3;
+	w = *(unsigned int *)(code_base + nbci + 8);
+	low = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 12);
+	high = (int)BYTESEX_REVERSE(w);
+	w = *(unsigned int *)(code_base + nbci + 4);
+	def = bci + (int)BYTESEX_REVERSE(w);
+	table = (unsigned int *)(code_base + nbci + 16);
+
+	Thumb2_Fill(jinfo, 1);
+	rs = POP(jstack);
+	Thumb2_Flush(jinfo);
+        r = rs;
+        if (low) {
+	  r = Thumb2_Tmp(jinfo, (1<<rs));
+	  sub_imm(jinfo->codebuf, r, rs, low);
+        }
+	cmp_imm(jinfo->codebuf, r, (high-low)+1);
+	loc = 0;
+	if (jinfo->bc_stackinfo[def] & BC_COMPILED)
+	  branch(jinfo->codebuf, COND_CS, jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
+	else
+	  loc = forward_long(jinfo->codebuf);
+	tbh(jinfo->codebuf, ARM_PC, r);
+	table_loc = out_loc(jinfo->codebuf);
+	negative_offsets = 0;
+	for (i = low, tablep = table; i <= high; i++) {
+	  int offset;
+	  w = *tablep++;
+	  offset = (int)BYTESEX_REVERSE(w);
+	  if (offset < 0) negative_offsets++;
+	  out_16_data(jinfo->codebuf, 0);
+	}
+        if (!Thumb2) out_align(jinfo->codebuf, 4);
+	negative_branch_table = out_loc(jinfo->codebuf);
+	for (i = 0; i < (int)negative_offsets; i++) {
+	  out_32(jinfo->codebuf, 0);
+	}
+
+	Thumb2_codegen(jinfo, bci+len);
+
+	if (loc) {
+	  JASSERT(jinfo->bc_stackinfo[def] & BC_COMPILED, "def not compiled in tableswitch");
+	  branch_patch(jinfo->codebuf, COND_CS, loc, jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
+	}
+
+	oldidx = codebuf->idx;
+	codebuf->idx = table_loc >> 1;
+	for (i = low, tablep = table; i <= high; i++) {
+	  unsigned dest;
+	  int offset;
+
+	  w = *tablep++;
+	  offset = (int)BYTESEX_REVERSE(w);
+	  dest = bci + offset;
+	  JASSERT(jinfo->bc_stackinfo[dest] & BC_COMPILED, "code not compiled");
+	  dest = jinfo->bc_stackinfo[dest] & ~BC_FLAGS_MASK;
+	  if (offset < 0) {
+	    unsigned oldidx;
+            // ECN::FIXME - Is this right?
+	    out_16_data(jinfo->codebuf, (negative_branch_table >> 1) - (table_loc >> 1));
+	    PATCH(negative_branch_table) {
+	      branch_uncond_patch(jinfo->codebuf, out_loc(jinfo->codebuf), dest);
+	      negative_branch_table = out_loc(jinfo->codebuf);
+	    } HCTAP;
+	  } else {
+	    JASSERT((dest & 1) == 0 && (table_loc & 1) == 0, "unaligned code");
+	    offset = (dest >> 1) - (table_loc >> 1);
+            if (!Thumb2) {
+              offset -= 2;
+              JASSERT(offset >= 0, "negative offset!");
+            }
+	    if (offset >= 65536) {
+	      longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+	    }
+	    out_16_data(jinfo->codebuf, offset);
+	  }
+	}
+	codebuf->idx = oldidx;
+	bci = (unsigned)-1;
+	len = 0;
+	break;
+      }
+
+      case opc_wide: {
+	unsigned local = GET_JAVA_U2(code_base + bci + 2);
+	opcode = code_base[bci+1];
+	if (opcode == opc_iinc) {
+	  int constant = GET_JAVA_S2(code_base + bci + 4);
+	  unsigned r = jinfo->jregs->r_local[local];
+	  
+	  if (!r) {
+	    int nlocals = jinfo->method->max_locals();
+	    r = ARM_IP;
+	    stackdepth -= jstack->depth;
+	    load_local(jinfo, r, local, stackdepth);
+	    add_imm(jinfo->codebuf, r, r, constant);
+	    store_local(jinfo, r, local, stackdepth);
+	  } else {
+	    Thumb2_Corrupt(jinfo, r, 0);
+	    add_imm(jinfo->codebuf, r, r, constant);
+	  }
+	} else if (opcode == opc_ret) {
+	  Thumb2_Exit(jinfo, H_RET, bci, stackdepth);
+	} else {
+	  if (opcode == opc_iload ||
+	  	opcode == opc_fload || opcode == opc_aload)
+	    Thumb2_Load(jinfo, local, stackdepth);
+	  else if (opcode == opc_lload || opcode == opc_dload)
+	    Thumb2_LoadX2(jinfo, local, stackdepth);
+	  else if (opcode == opc_istore ||
+	  	opcode == opc_fstore || opcode == opc_astore)
+	    Thumb2_Store(jinfo, local, stackdepth);
+	  else if (opcode == opc_lstore || opcode == opc_dstore)
+	    Thumb2_StoreX2(jinfo, local, stackdepth);
+	  else fatal(err_msg("Undefined wide opcode %d\n", opcode));
+	}
+	break;
+      }
+
+      default:
+	JASSERT(0, "unknown bytecode");
+	break;
+    }
+    bci += len;
+    if (len == 0) {
+      if (start_idx == jinfo->codebuf->idx) SET_START_BCI(start_idx, -1);
+    } else
+      SET_END_BCI(start_idx, bci);
+  }
+}
+
+#define BEG_BCI_OFFSET		0
+#define END_BCI_OFFSET		1
+#define HANDLER_BCI_OFFSET	2
+#define KLASS_INDEX_OFFSET	3
+#define ENTRY_SIZE		4
+
+extern "C" int Thumb2_lr_to_bci(unsigned lr, methodOop method, Reg *regs, unsigned *locals)
+{
+  Compiled_Method *cmethod = compiled_method_list;
+  ExceptionTable table(method);
+  constantPoolOop pool = method->constants();
+  int length = table.length();
+
+  while (cmethod) {
+    unsigned *exception_table = cmethod->exception_table;
+    if (exception_table) {
+      unsigned code_base = (unsigned)cmethod;
+      if (code_base <= lr && lr <= (unsigned)exception_table) {
+	int exception_index = -1;
+	unsigned exception_found = 0;
+
+	for (int i = 0; i < length; i++) {
+	  unsigned offsets = *exception_table++;
+	  unsigned exc_beg = code_base + ((offsets >> 16) << 1);
+	  unsigned exc_end = code_base + ((offsets & 0xffff) << 1);
+
+	  if (exc_beg <= lr && lr <= exc_end) {
+	    if (exc_beg > exception_found) {
+	      // With nested try catch blocks, choose the most deeply nested
+	      exception_found = exc_beg;
+	      exception_index = i;
+	    }	    
+	  }
+	  if (exception_index >= 0) {
+	    if (regs) {
+	      for (unsigned i = 0; i < PREGS; i++) {
+		int local = cmethod->regusage[i];
+		if (local >= 0) {
+		  locals[-local] = regs[i];
+		}
+	      }
+	    }
+	    return table.start_pc(exception_index);
+	  }
+	}
+      }
+    }
+    cmethod = cmethod->next;
+  }
+  return -1;
+}
+
+void Thumb2_generate_exception_table(Compiled_Method *cmethod, Thumb2_Info *jinfo)
+{
+  methodOop method = jinfo->method;
+  ExceptionTable table(method);
+  constantPoolOop pool = method->constants();
+  int length = table.length();
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+
+  cmethod->exception_table = (unsigned *)out_pos(jinfo->codebuf);
+  for (int i = 0; i < length; i++) {
+    int b_bci = table.start_pc(i);
+    int e_bci = table.end_pc(i);
+    unsigned stackinfo;
+    unsigned beg_offset, end_offset;
+
+    stackinfo = bc_stackinfo[b_bci];
+    beg_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
+    stackinfo = bc_stackinfo[e_bci];
+    end_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
+    if (!(beg_offset != 0 && end_offset >= beg_offset && end_offset < 65536)) {
+	longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+    }
+    out_32(jinfo->codebuf, (beg_offset << 16) | (end_offset));
+  }
+}
+
+void Thumb2_tablegen(Compiled_Method *cmethod, Thumb2_Info *jinfo)
+{
+  unsigned code_size = jinfo->code_size;
+  jubyte *code_base = jinfo->code_base;
+  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
+  unsigned bci;
+  unsigned count = 0;
+  unsigned i;
+  CodeBuf *codebuf = jinfo->codebuf;
+
+  cmethod->osr_table = (unsigned *)out_pos(jinfo->codebuf);
+  out_32(codebuf, 0);
+  bc_stackinfo[0] |= BC_BACK_TARGET;
+  for (bci = 0; bci < code_size;) {
+    unsigned stackinfo = bc_stackinfo[bci];
+    unsigned bytecodeinfo;
+    unsigned opcode;
+
+    if (stackinfo & BC_BACK_TARGET) {
+      unsigned code_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
+      JASSERT(stackinfo & BC_COMPILED, "back branch target not compiled???");
+      if (code_offset >= 65536) {
+	longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+      }
+//      JASSERT(code_offset < (1<<16), "oops, codesize too big");
+      out_32(codebuf, (bci << 16) | code_offset);
+      count++;
+    }
+
+    opcode = code_base[bci];
+    bytecodeinfo = bcinfo[opcode];
+    if (!BCI_SPECIAL(bytecodeinfo)) {
+      bci += BCI_LEN(bytecodeinfo);
+      continue;
+    } else {
+      int len = Bytecodes::length_for((Bytecodes::Code)opcode);
+      if (len <= 0) {
+	Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
+	len = (Bytecodes::special_length_at
+	       (code,
+		(address)(code_base+bci), (address)(code_base+code_size)));
+      }
+      bci += len;
+    }
+  }
+  *cmethod->osr_table = count;
+  if (jinfo->method->has_exception_handler())
+    Thumb2_generate_exception_table(cmethod, jinfo);
+}
+
+extern "C" void Thumb2_Clear_Cache(char *base, char *limit);
+#define IS_COMPILED(e, cb) ((e) >= (unsigned)(cb) && (e) < (unsigned)(cb) + (cb)->size)
+
+unsigned Thumb2_osr_from_bci(Compiled_Method *cmethod, unsigned bci)
+{
+  unsigned *osr_table;
+  unsigned count;
+  unsigned i;
+
+  osr_table = cmethod->osr_table;
+  if (!osr_table) return 0;
+  count = *osr_table++;
+  for (i = 0; i < count; i++) {
+    unsigned u = *osr_table++;
+
+    if (bci == (u>>16)) return (u & 0xffff) << 1;
+  }
+  return 0;
+}
+
+extern "C" void Debug_Stack(intptr_t *stack)
+{
+  int i;
+  char msg[16];
+
+  tty->print("  Stack:");
+  for (i = 0; i < 6; i++) {
+    tty->print(" [");
+    sprintf(msg, "%d", i);
+    tty->print(msg);
+    tty->print("] = ");
+    sprintf(msg, "%08x", (int)stack[i]);
+    tty->print(msg);
+  }
+  tty->cr();
+}
+
+extern "C" void Debug_MethodEntry(interpreterState istate, intptr_t *stack, methodOop callee)
+{
+ JDEBUG_(
+  if (DebugSwitch) {
+    methodOop method = istate->method();
+    tty->print("Entering ");
+    callee->print_short_name(tty);
+    tty->print(" from ");
+    method->print_short_name(tty);
+    tty->cr();
+    Debug_Stack(stack);
+    tty->flush();
+  }
+ )
+}
+
+extern "C" void Debug_MethodExit(interpreterState istate, intptr_t *stack)
+{
+ JDEBUG_(
+  if (DebugSwitch) {
+    methodOop method = istate->method();
+    tty->print("Leaving ");
+    method->print_short_name(tty);
+    tty->cr();
+    Debug_Stack(stack);
+    tty->flush();
+    if (exc) tty->print_cr("Exception %s", exc->print_value_string());
+  }
+ )
+}
+
+extern "C" void Debug_MethodCall(interpreterState istate, intptr_t *stack, methodOop callee)
+{
+ JDEBUG_(
+  if (DebugSwitch) {
+    methodOop method = istate->method();
+    tty->print("Calling ");
+    callee->print_short_name(tty);
+    tty->print(" from ");
+    method->print_short_name(tty);
+    tty->cr();
+    Debug_Stack(stack);
+    tty->flush();
+  }
+ )
+}
+extern "C" void Thumb2_Install(methodOop mh, u32 entry);
+
+extern "C" unsigned cmpxchg_ptr(unsigned new_value, volatile unsigned *ptr, unsigned cmp_value);
+static volatile unsigned compiling;
+
+static unsigned CompileCount = 1000000;
+static unsigned DisassAfter = 0;
+static char *T2CompileOnly = NULL;
+static unsigned MaxCompile = 10000;
+
+#define COMPILE_ONLY	T2CompileOnly
+#define COMPILE_COUNT	CompileCount
+//#define DISASS_AFTER	DisassAfter
+//#define COMPILE_LIST
+
+#ifdef COMPILE_LIST
+static const char *compile_list[] = {
+	0
+};
+#endif
+
+static unsigned compiled_methods = 0;
+
+#ifdef T2_PRINT_STATISTICS
+static unsigned bytecodes_compiled = 0;
+static unsigned arm_code_generated = 0;
+static clock_t total_compile_time = 0;
+#endif
+
+extern "C" unsigned long long Thumb2_Compile(JavaThread *thread, unsigned branch_pc)
+{
+  HandleMark __hm(thread);
+  frame fr = thread->last_frame();
+  methodOop method = fr.interpreter_frame_method();
+  Symbol *name = method->name();
+  Symbol *sig = method->signature();
+  const jbyte *base = sig->base();
+
+  jubyte *code_base = (jubyte *)method->code_base();
+  int code_size = method->code_size();
+  InvocationCounter* ic = method->invocation_counter();
+  InvocationCounter* bc = method->backedge_counter();
+  Thumb2_Info jinfo_str;
+  CodeBuf codebuf_str;
+  Thumb2_Stack jstack_str;
+  Thumb2_Registers jregs_str;
+  int idx;
+  u32 code_handle, slow_entry;
+  Thumb2_CodeBuf *cb = thumb2_codebuf;
+  int rc;
+  char *saved_hp;
+  Compiled_Method *cmethod;
+  u32 compiled_offset;
+  Thumb2_Entrypoint thumb_entry;
+  int compiled_accessor;
+
+  // Only support arch >= 6 for the moment
+  if (!ARCH_GE_V6(CPUInfo)) UseCompiler = false;  
+
+  {
+    bool ignore;
+    methodHandle mh(thread, method);
+    if (!UseCompiler || method->is_not_compilable()
+	|| CompilerOracle::should_exclude(mh, ignore)) {
+      ic->set(ic->state(), 1);
+      bc->set(ic->state(), 1);
+      return 0;
+    }
+  }
+
+  slow_entry = *(unsigned *)method->from_interpreted_entry();
+  if (IS_COMPILED(slow_entry, cb)) {
+    cmethod = (Compiled_Method *)(slow_entry & ~TBIT);
+    compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
+    if (compiled_offset == 0) return 0;
+    thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
+    thumb_entry.regusage = cmethod->regusage;
+    return *(unsigned long long *)&thumb_entry;
+  }
+
+  ic->decay();
+  bc->decay();
+
+  // Dont compile anything with code size >= 32K.
+  // We rely on the bytecode index fitting in 16 bits
+  //
+  // Dont compile anything with max stack + maxlocal > 1K
+  // The range of an LDR in T2 is -4092..4092
+  // Othersize we have difficulty access the locals from the stack pointer
+  //
+  if (code_size > THUMB2_MAX_BYTECODE_SIZE ||
+		(method->max_locals() + method->max_stack()) >= 1000) {
+        method->set_not_compilable();
+	return 0;
+  }
+
+#ifdef COMPILE_COUNT
+  if (compiled_methods == COMPILE_COUNT) return 0;
+#endif
+
+#ifdef COMPILE_ONLY
+    if (COMPILE_ONLY && strcmp(name->as_C_string(), COMPILE_ONLY) != 0)
+      return 0;
+#endif
+
+#ifdef COMPILE_LIST
+  {
+	const char **argv = compile_list;
+	const char *s;
+	while (s = *argv++) {
+		if (strcmp(s, method->name_and_sig_as_C_string()) == 0)
+			break;
+	}
+	if (!s) {
+		method->set_not_compilable();
+		return 0;
+	}
+  }
+#endif
+
+  saved_hp = cb->hp;
+  if (rc = setjmp(compiler_error_env)) {
+    cb->hp = saved_hp;
+    if (rc == COMPILER_RESULT_FAILED)
+        method->set_not_compilable();
+    if (rc == COMPILER_RESULT_FATAL)
+	UseCompiler = false;
+    compiling = 0;
+    return 0;
+  }
+
+  if (cmpxchg_ptr(1, &compiling, 0)) return 0;
+
+#ifdef T2_PRINT_STATISTICS
+  clock_t compile_time = clock();
+#endif
+
+#ifdef T2_PRINT_COMPILATION
+  if (PrintCompilation || PrintAssembly) {
+    fprintf(stderr, "Compiling %d %c%c %s\n",
+	compiled_methods,
+	method->is_synchronized() ? 'S' : ' ',
+	method->has_exception_handler() ? 'E' : ' ',
+	method->name_and_sig_as_C_string());
+  }
+#endif
+
+  memset(bc_stackinfo, 0, code_size * sizeof(unsigned));
+  memset(locals_info, 0, method->max_locals() * sizeof(unsigned));
+#ifdef T2_PRINT_DISASS
+  memset(start_bci, 0xff, sizeof(start_bci));
+  memset(end_bci, 0xff, sizeof(end_bci));
+#endif
+
+#ifdef THUMB2_JVMTI
+  address_bci_map_reset(thread);
+#endif // THUMB2_JVMTI
+
+  jinfo_str.thread = thread;
+  jinfo_str.method = method;
+  jinfo_str.code_base = code_base;
+  jinfo_str.code_size = code_size;
+  jinfo_str.bc_stackinfo = bc_stackinfo;
+  jinfo_str.locals_info = locals_info;
+  jinfo_str.compiled_return = 0;
+  for (int i = 0; i < 12; i++) jinfo_str.compiled_word_return[i] = 0;
+  jinfo_str.is_leaf = 1;
+  // use_istate must be enabled for DebugSwitch because various of the
+  // Debug_ methods use it. However it may change the code generation.
+  jinfo_str.use_istate = DebugSwitch || method->has_monitor_bytecodes();
+
+  Thumb2_local_info_from_sig(&jinfo_str, method, base);
+
+  Thumb2_pass1(&jinfo_str, 0, 0);
+
+  codebuf_str.codebuf = (unsigned short *)cb->hp;
+  codebuf_str.idx = 0;
+  codebuf_str.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
+
+  jstack_str.stack = stack;
+  jstack_str.depth = 0;
+
+  memset(r_local, 0, method->max_locals() * sizeof(unsigned));
+
+  jregs_str.r_local = r_local;
+
+  jinfo_str.codebuf = &codebuf_str;
+  jinfo_str.jstack = &jstack_str;
+  jinfo_str.jregs = &jregs_str;
+
+  jregs_str.pregs[0] = JAZ_V1;
+  jregs_str.pregs[1] = JAZ_V2;
+  jregs_str.pregs[2] = JAZ_V3;
+  jregs_str.pregs[3] = JAZ_V4;
+  jregs_str.pregs[4] = JAZ_V5;
+  jregs_str.pregs[5] = JAZ_V6;
+
+  jregs_str.npregs = PREGS;
+
+  Thumb2_RegAlloc(&jinfo_str);
+
+  slow_entry = out_align_offset(&codebuf_str, CODE_ALIGN, SLOW_ENTRY_OFFSET);
+  cmethod = (Compiled_Method *)slow_entry;
+  if (Thumb2) slow_entry |= TBIT;
+
+  cb->hp += codebuf_str.idx * 2;
+  codebuf_str.codebuf = (unsigned short *)cb->hp;
+  codebuf_str.idx = 0;
+  codebuf_str.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
+
+  compiled_accessor = 1;
+  if (!method->is_accessor() || !Thumb2_Accessor(&jinfo_str)) {
+    Thumb2_Enter(&jinfo_str);
+    Thumb2_codegen(&jinfo_str, 0);
+    compiled_accessor = 0;
+  }
+
+#ifdef T2_PRINT_DISASS
+  if (PrintAssembly) {
+#ifdef DISASS_AFTER
+    if (compiled_methods >= DISASS_AFTER) {
+      Thumb2_disass(&jinfo_str);
+    }
+#else
+    Thumb2_disass(&jinfo_str);
+#endif
+  }
+#endif
+
+  for (int i = 0; i < PREGS; i++)
+    cmethod->regusage[i] = jregs_str.mapping[i];
+
+  Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf_str.idx * 2);
+
+#ifdef T2_PRINT_STATISTICS
+  compile_time = clock() - compile_time;
+  total_compile_time += compile_time;
+
+  if (t2_print_statistics) {
+    unsigned codegen = codebuf_str.idx * 2;
+    bytecodes_compiled += code_size;
+    arm_code_generated += codegen;
+    fprintf(stderr, "%d bytecodes => %d bytes code in %.2f sec, totals: %d => %d in %.2f sec\n",
+      code_size, codegen, (double)compile_time/(double)CLOCKS_PER_SEC,
+    bytecodes_compiled, arm_code_generated, (double)total_compile_time/(double)CLOCKS_PER_SEC);
+  }
+#endif
+
+  code_handle = out_align(&codebuf_str, sizeof(address));
+
+  out_32(&codebuf_str, slow_entry);
+
+  if (!compiled_accessor)
+    Thumb2_tablegen(cmethod, &jinfo_str);
+
+  cb->hp += codebuf_str.idx * 2;
+
+  //if (!Thumb2) longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
+
+  *compiled_method_list_tail_ptr = cmethod;
+  compiled_method_list_tail_ptr = &(cmethod->next);
+
+  Thumb2_Install(method, code_handle);
+
+  compiled_methods++;
+
+  compiling = 0;
+
+  compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
+  if (compiled_offset == 0) return 0;
+  thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
+  thumb_entry.regusage = cmethod->regusage;
+
+#ifdef THUMB2_JVMTI
+  {
+    // we need to dispatch a compiled_method_load event
+    // to all registered Jvmti agents
+
+    // notify the whole generated code region for this Java method
+    // from slow_entry through to the end of the osr table. some
+    // of it is data not code but that's not a problem.
+
+    const void *gen_code_start = (const void *)(slow_entry & ~TBIT);
+    unsigned gen_code_size = codebuf_str.idx * 2;
+
+    // address_bci_map translates start addresses for generated code
+    // sections to bytecode indices and contains address_bci_map_length
+    // entries
+
+    // the final compile_info argument is supposed to contain
+    // information about inlined code. we can supply NULL for now -
+    // oprofile doesn't use it anyway
+
+    void *compile_info = NULL;
+
+    // transition from in Java to in VM before calling into Jvmti
+    ThreadInVMfromJava transition(thread);
+
+    JvmtiExport::post_compiled_method_load(method, gen_code_size,
+		gen_code_start, address_bci_map_length,
+		address_bci_map, NULL);
+  }
+#endif // THUMB2_JVMTI
+
+  return *(unsigned long long *)&thumb_entry;
+}
+
+extern "C" void Thumb2_DivZero_Handler(void);
+extern "C" void Thumb2_ArrayBounds_Handler(void);
+extern "C" void Thumb2_Handle_Exception(void);
+extern "C" void Thumb2_Handle_Exception_NoRegs(void);
+extern "C" void Thumb2_Exit_To_Interpreter(void);
+extern "C" void Thumb2_Stack_Overflow(void);
+extern "C" void Thumb2_monitorenter(void);
+
+extern "C" void __divsi3(void);
+extern "C" void __aeabi_ldivmod(void);
+extern "C" void __aeabi_i2f(void);
+extern "C" void __aeabi_i2d(void);
+extern "C" void __aeabi_l2f(void);
+extern "C" void __aeabi_l2d(void);
+extern "C" void __aeabi_f2d(void);
+extern "C" void __aeabi_d2f(void);
+extern "C" void Helper_new(void);
+extern "C" void Helper_instanceof(void);
+extern "C" void Helper_checkcast(void);
+extern "C" void Helper_monitorexit(void);
+extern "C" void Helper_aastore(void);
+extern "C" void Helper_aputfield(void);
+extern "C" void Helper_synchronized_enter(void);
+extern "C" void Helper_synchronized_exit(void);
+extern "C" void Helper_SafePoint(void);
+
+extern "C" void _ZN13SharedRuntime3f2iEf(void);
+extern "C" void _ZN13SharedRuntime3f2lEf(void);
+extern "C" void _ZN13SharedRuntime3d2iEd(void);
+extern "C" void _ZN13SharedRuntime3d2lEd(void);
+extern "C" void _ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei(void);
+extern "C" void _ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii(void);
+extern "C" void _ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi(void);
+extern "C" void _ZN18InterpreterRuntime3ldcEP10JavaThreadb(void);
+
+extern char Thumb2_stubs[];
+extern char Thumb2_stubs_end[];
+extern char Thumb2_idiv_stub[];
+extern char Thumb2_irem_stub[];
+extern char Thumb2_invokeinterface_stub[];
+extern char Thumb2_invokevirtual_stub[];
+extern char Thumb2_invokestatic_stub[];
+extern char Thumb2_invokespecial_stub[];
+extern char Thumb2_getfield_word_stub[];
+extern char Thumb2_getfield_sh_stub[];
+extern char Thumb2_getfield_h_stub[];
+extern char Thumb2_getfield_sb_stub[];
+extern char Thumb2_getfield_dw_stub[];
+extern char Thumb2_putfield_word_stub[];
+extern char Thumb2_putfield_h_stub[];
+extern char Thumb2_putfield_b_stub[];
+extern char Thumb2_putfield_a_stub[];
+extern char Thumb2_putfield_dw_stub[];
+extern char Thumb2_getstatic_word_stub[];
+extern char Thumb2_getstatic_sh_stub[];
+extern char Thumb2_getstatic_h_stub[];
+extern char Thumb2_getstatic_sb_stub[];
+extern char Thumb2_getstatic_dw_stub[];
+extern char Thumb2_putstatic_word_stub[];
+extern char Thumb2_putstatic_h_stub[];
+extern char Thumb2_putstatic_b_stub[];
+extern char Thumb2_putstatic_a_stub[];
+extern char Thumb2_putstatic_dw_stub[];
+
+extern char Thumb2_invokestaticresolved_stub[];
+extern char Thumb2_invokespecialresolved_stub[];
+extern char Thumb2_invokevirtualresolved_stub[];
+extern char Thumb2_invokevfinalresolved_stub[];
+
+#define STUBS_SIZE	(Thumb2_stubs_end-Thumb2_stubs)
+#define IDIV_STUB		(Thumb2_idiv_stub-Thumb2_stubs)
+#define IREM_STUB		(Thumb2_irem_stub-Thumb2_stubs)
+#define INVOKEINTERFACE_STUB	(Thumb2_invokeinterface_stub-Thumb2_stubs)
+#define INVOKEVIRTUAL_STUB	(Thumb2_invokevirtual_stub-Thumb2_stubs)
+#define INVOKESTATIC_STUB	(Thumb2_invokestatic_stub-Thumb2_stubs)
+#define INVOKESPECIAL_STUB	(Thumb2_invokespecial_stub-Thumb2_stubs)
+#define GETFIELD_WORD_STUB	(Thumb2_getfield_word_stub-Thumb2_stubs)
+#define GETFIELD_SH_STUB	(Thumb2_getfield_sh_stub-Thumb2_stubs)
+#define GETFIELD_H_STUB		(Thumb2_getfield_h_stub-Thumb2_stubs)
+#define GETFIELD_SB_STUB	(Thumb2_getfield_sb_stub-Thumb2_stubs)
+#define GETFIELD_DW_STUB	(Thumb2_getfield_dw_stub-Thumb2_stubs)
+#define PUTFIELD_WORD_STUB	(Thumb2_putfield_word_stub-Thumb2_stubs)
+#define PUTFIELD_H_STUB		(Thumb2_putfield_h_stub-Thumb2_stubs)
+#define PUTFIELD_B_STUB		(Thumb2_putfield_b_stub-Thumb2_stubs)
+#define PUTFIELD_A_STUB		(Thumb2_putfield_a_stub-Thumb2_stubs)
+#define PUTFIELD_DW_STUB	(Thumb2_putfield_dw_stub-Thumb2_stubs)
+#define GETSTATIC_WORD_STUB	(Thumb2_getstatic_word_stub-Thumb2_stubs)
+#define GETSTATIC_SH_STUB	(Thumb2_getstatic_sh_stub-Thumb2_stubs)
+#define GETSTATIC_H_STUB	(Thumb2_getstatic_h_stub-Thumb2_stubs)
+#define GETSTATIC_SB_STUB	(Thumb2_getstatic_sb_stub-Thumb2_stubs)
+#define GETSTATIC_DW_STUB	(Thumb2_getstatic_dw_stub-Thumb2_stubs)
+#define PUTSTATIC_WORD_STUB	(Thumb2_putstatic_word_stub-Thumb2_stubs)
+#define PUTSTATIC_H_STUB	(Thumb2_putstatic_h_stub-Thumb2_stubs)
+#define PUTSTATIC_B_STUB	(Thumb2_putstatic_b_stub-Thumb2_stubs)
+#define PUTSTATIC_A_STUB	(Thumb2_putstatic_a_stub-Thumb2_stubs)
+#define PUTSTATIC_DW_STUB	(Thumb2_putstatic_dw_stub-Thumb2_stubs)
+
+#define INVOKESTATIC_RESOLVED_STUB (Thumb2_invokestaticresolved_stub-Thumb2_stubs)
+#define INVOKESPECIAL_RESOLVED_STUB (Thumb2_invokespecialresolved_stub-Thumb2_stubs)
+#define INVOKEVIRTUAL_RESOLVED_STUB (Thumb2_invokevirtualresolved_stub-Thumb2_stubs)
+#define INVOKEVFINAL_RESOLVED_STUB (Thumb2_invokevfinalresolved_stub-Thumb2_stubs)
+
+extern "C" void Thumb2_NullPtr_Handler(void);
+
+
+extern "C" int Thumb2_Check_Null(unsigned *regs, unsigned pc)
+{
+  Thumb2_CodeBuf *cb = thumb2_codebuf;
+  // Ignore if < ARMv6
+  if (!ARCH_GE_V6(CPUInfo)) return 0;
+  if (IS_COMPILED(pc, cb)) {
+    regs[ARM_LR] = pc;
+    regs[ARM_PC] = (unsigned)Thumb2_NullPtr_Handler;
+    regs[ARM_CPSR] &= ~CPSR_THUMB_BIT;
+    return 1;
+  }
+  return 0;
+}
+
+extern "C" void Thumb2_Initialize(void)
+{
+  CodeBuf codebuf;
+  Thumb2_CodeBuf *cb;
+  u32 h_divzero;
+  u32 loc_irem, loc_idiv, loc_ldiv;
+  int rc;
+
+  // Only support arch >= 6 for the moment
+  if (!ARCH_GE_V6(CPUInfo)) {
+    UseCompiler = false;
+    return;
+  }
+
+#ifdef T2_PRINT_COMPILATION
+  PrintCompilation |= getenv("T2_PRINT_COMPILATION") != NULL;
+#endif
+#ifdef T2_PRINT_STATISTICS
+  t2_print_statistics = getenv("T2_PRINT_STATISTICS");
+#endif
+#ifdef T2_PRINT_DISASS
+  PrintAssembly |= getenv("T2_PRINT_DISASS") != NULL;
+#endif
+#ifdef T2_PRINT_REGUSAGE
+  t2_print_regusage = getenv("T2_PRINT_REGUSAGE");
+#endif
+#ifdef COMPILE_COUNT
+    char *tmp = getenv("T2_COMPILE_COUNT");
+    if (tmp) CompileCount = atol(tmp);
+#endif
+#ifdef DISASS_AFTER
+  char *tmp = getenv("T2_DISASS_AFTER");
+  if (tmp) DisassAfter = atol(tmp);
+#endif
+#ifdef COMPILE_ONLY
+  T2CompileOnly = getenv("T2_COMPILE_ONLY");
+#endif
+  DebugSwitch = getenv("T2_DEBUG");
+
+  if (getenv("T2_COMPILE_ARM") != NULL) Thumb2 = 0;
+  if (getenv("T2_COMPILE_THUMB") != NULL) Thumb2 = 1;
+  if (!(CPUInfo & ARCH_THUMB2)) Thumb2 = 0;
+
+  //printf("Compiling for %s\n", Thumb2 ? "Thumb" : "ARM");
+
+  cb = (Thumb2_CodeBuf *)mmap(0, THUMB2_CODEBUF_SIZE, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+  if (cb == MAP_FAILED) {
+    UseCompiler = false;
+    return;
+  }
+
+  cb->size = THUMB2_CODEBUF_SIZE;
+  cb->hp = (char *)cb + sizeof(Thumb2_CodeBuf);
+  cb->sp = (char *)cb + THUMB2_CODEBUF_SIZE;
+
+  codebuf.codebuf = (unsigned short *)cb->hp;
+  codebuf.idx = 0;
+  codebuf.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
+
+  if (rc = setjmp(compiler_error_env)) {
+    UseCompiler = false;
+    return;
+  }
+
+#ifdef THUMB2_JVMTI
+  // cache the start of the generated stub region for notification later
+  stub_gen_code_start = cb->hp;
+#endif // THUMB2_JVMTI
+
+  memcpy(cb->hp, Thumb2_stubs, STUBS_SIZE);
+
+  // fprintf(stderr, "Thumb2_stubs offset: 0x%x\n",
+  // 	  (char*)(cb->hp) - (char*)Thumb2_stubs);
+
+  handlers[H_IDIV] = (unsigned)(cb->hp + IDIV_STUB);
+  handlers[H_IREM] = (unsigned)(cb->hp + IREM_STUB);
+handlers[H_INVOKEINTERFACE] = (unsigned)(cb->hp + INVOKEINTERFACE_STUB);
+  handlers[H_INVOKEVIRTUAL] = (unsigned)(cb->hp + INVOKEVIRTUAL_STUB);
+  handlers[H_INVOKESTATIC] = (unsigned)(cb->hp + INVOKESTATIC_STUB);
+  handlers[H_INVOKESPECIAL] = (unsigned)(cb->hp + INVOKESPECIAL_STUB);
+
+  handlers[H_GETFIELD_WORD] = (unsigned)(cb->hp + GETFIELD_WORD_STUB);
+  handlers[H_GETFIELD_SH] = (unsigned)(cb->hp + GETFIELD_SH_STUB);
+  handlers[H_GETFIELD_H] = (unsigned)(cb->hp + GETFIELD_H_STUB);
+  handlers[H_GETFIELD_SB] = (unsigned)(cb->hp + GETFIELD_SB_STUB);
+  handlers[H_GETFIELD_DW] = (unsigned)(cb->hp + GETFIELD_DW_STUB);
+
+  handlers[H_INVOKESTATIC_RESOLVED] = (unsigned)(cb->hp + INVOKESTATIC_RESOLVED_STUB);
+  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKESPECIAL_RESOLVED_STUB);
+  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKEVIRTUAL_RESOLVED_STUB);
+  handlers[H_INVOKEVFINAL] = (unsigned)(cb->hp + INVOKEVFINAL_RESOLVED_STUB);
+
+  handlers[H_PUTFIELD_WORD] = (unsigned)(cb->hp + PUTFIELD_WORD_STUB);
+  handlers[H_PUTFIELD_H] = (unsigned)(cb->hp + PUTFIELD_H_STUB);
+  handlers[H_PUTFIELD_B] = (unsigned)(cb->hp + PUTFIELD_B_STUB);
+  handlers[H_PUTFIELD_A] = (unsigned)(cb->hp + PUTFIELD_A_STUB);
+  handlers[H_PUTFIELD_DW] = (unsigned)(cb->hp + PUTFIELD_DW_STUB);
+
+  handlers[H_GETSTATIC_WORD] = (unsigned)(cb->hp + GETSTATIC_WORD_STUB);
+  handlers[H_GETSTATIC_SH] = (unsigned)(cb->hp + GETSTATIC_SH_STUB);
+  handlers[H_GETSTATIC_H] = (unsigned)(cb->hp + GETSTATIC_H_STUB);
+  handlers[H_GETSTATIC_SB] = (unsigned)(cb->hp + GETSTATIC_SB_STUB);
+  handlers[H_GETSTATIC_DW] = (unsigned)(cb->hp + GETSTATIC_DW_STUB);
+
+  handlers[H_PUTSTATIC_WORD] = (unsigned)(cb->hp + PUTSTATIC_WORD_STUB);
+  handlers[H_PUTSTATIC_H] = (unsigned)(cb->hp + PUTSTATIC_H_STUB);
+  handlers[H_PUTSTATIC_B] = (unsigned)(cb->hp + PUTSTATIC_B_STUB);
+  handlers[H_PUTSTATIC_A] = (unsigned)(cb->hp + PUTSTATIC_A_STUB);
+  handlers[H_PUTSTATIC_DW] = (unsigned)(cb->hp + PUTSTATIC_DW_STUB);
+
+  codebuf.idx += (Thumb2_stubs_end-Thumb2_stubs) >> 1;
+
+  // Disassemble the codebuf we just created.  For debugging.  This
+  // first part is all ARM code; the part that we're about to create
+  // is Thumb code.
+#ifdef T2_PRINT_DISASS
+  if (PrintAssembly) {
+    Hsdis hsdis;
+    hsdis.decode_instructions(cb->hp, cb->hp + codebuf.idx * 2,
+			      print_address, NULL, NULL, stderr,
+			      "");
+    fputc('\n', stderr);
+  }
+#endif
+
+  char *begin_thumb_code = cb->hp + codebuf.idx * 2;
+
+  handlers[H_LDIV] = handlers[H_LREM] = out_pos(&codebuf);
+  dop_reg(&codebuf, DP_ORR, ARM_IP, ARM_R2, ARM_R3, 0, 0);
+  loc_ldiv = forward_short(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_ldivmod);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+  bcc_patch(&codebuf, COND_EQ, loc_ldiv);
+  mov_imm(&codebuf, ARM_IP, (u32)Thumb2_DivZero_Handler);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_ARRAYBOUND] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_ArrayBounds_Handler);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  handlers[H_HANDLE_EXCEPTION] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  handlers[H_HANDLE_EXCEPTION_NO_REGS] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception_NoRegs);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  handlers[H_STACK_OVERFLOW] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Stack_Overflow);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  handlers[H_DREM] = out_pos(&codebuf);
+  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_imm(&codebuf, ARM_IP, (u32)fmod);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_d_toVFP(&codebuf, VFP_D0, ARM_R0, ARM_R1);
+  vmov_reg_d_toVFP(&codebuf, VFP_D1, ARM_R2, ARM_R3);
+#endif
+  blx_reg(&codebuf, ARM_IP);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_d_toARM(&codebuf, ARM_R0, ARM_R1, VFP_D0);
+#endif
+  ldm(&codebuf, (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+  handlers[H_FREM] = out_pos(&codebuf);
+  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_imm(&codebuf, ARM_R3, (u32)fmodf);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
+  vmov_reg_s_toVFP(&codebuf, VFP_S1, ARM_R1);
+#endif
+  blx_reg(&codebuf, ARM_R3);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_s_toARM(&codebuf, ARM_R0, VFP_S0);
+#endif
+  ldm(&codebuf, (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+  handlers[H_I2F] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_i2f);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_I2D] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_i2d);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_L2F] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_l2f);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_L2D] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_l2d);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_F2I] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3f2iEf);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
+#endif
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_F2L] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3f2lEf);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
+#endif
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_F2D] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_f2d);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_D2I] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3d2iEd);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_d_toVFP(&codebuf, VFP_S0, ARM_R0, ARM_R1);
+#endif
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_D2L] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3d2lEd);
+#ifdef __ARM_PCS_VFP
+  vmov_reg_d_toVFP(&codebuf, VFP_S0, ARM_R0, ARM_R1);
+#endif
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+  handlers[H_D2F] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_d2f);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// NEW Stub
+//   r1 = index
+//   r3 = bci
+//   result -> R0, == 0 => exception
+  handlers[H_NEW] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_R0, ISTATE_METHOD);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_new);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R2, ARM_R2, ARM_R3);
+sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK);
+  str_imm(&codebuf, ARM_R2, ARM_R0, ISTATE_BCP);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// NEWARRAY Stub
+//   r1 = atype
+//   r2 = tos
+//   r3 = bci
+//   result -> thread->vm_result
+  handlers[H_NEWARRAY] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R3, ARM_R0, ARM_R3);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// ANEWARRAY Stub
+//   r0 = bci
+//   r2 = index
+//   r3 = tos
+//   result -> thread->vm_result
+  handlers[H_ANEWARRAY] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+
+  sub_imm(&codebuf, ARM_R1, Rstack, 4);
+  str_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+
+  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTMETHOD);
+  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTANTS);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// MULTIANEWARRAY Stub
+//   r0 = bci
+//   r1 = dimensions (*4)
+  handlers[H_MULTIANEWARRAY] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
+  add_reg(&codebuf, Rstack, Rstack, ARM_R1);
+  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  sub_imm(&codebuf, ARM_R1, Rstack, 4);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// LDC Stub
+//   r0 = bci
+  handlers[H_LDC] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
+  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_imm(&codebuf, ARM_R1, 0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// LDC_W Stub
+//   r0 = bci
+  handlers[H_LDC_W] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
+  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_imm(&codebuf, ARM_R1, 1);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// INSTANCEOF Stub
+//   r1 = index
+//   r2 = tos
+//   r3 = bci
+//   result -> R0, == -1 => exception
+  handlers[H_INSTANCEOF] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_instanceof);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// CHECKCAST Stub
+//   r1 = index
+//   r2 = tos
+//   r3 = bci
+//   result -> R0, != 0 => exception
+  handlers[H_CHECKCAST] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_checkcast);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// MONITORENTER
+//   r0 = bci
+  handlers[H_MONITORENTER] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_monitorenter);
+  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// MONITOREXIT Stub
+//   r1 = tos
+//   r3 = bci
+//   result -> R0, != 0 => exception
+  handlers[H_MONITOREXIT] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_monitorexit);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// AASTORE Stub
+//   r0 = bci
+//   r1 = value
+//   r2 = index
+//   r3 = arrayref
+  handlers[H_AASTORE] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_BCP);
+  sub_imm(&codebuf, ARM_IP, Rstack, 4);
+  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_STACK);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_aastore);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// APUTFIELD Stub
+//   r0 = obj
+  handlers[H_APUTFIELD] = out_pos(&codebuf);
+  mov_imm(&codebuf, ARM_R3, (u32)Helper_aputfield);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// SYNCHRONIZED_ENTER Stub
+//   r0 = bci
+//   Rstack = monitor
+  handlers[H_SYNCHRONIZED_ENTER] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R2, ARM_R2, ARM_R0);
+  str_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_BCP-ISTATE_NEXT_FRAME);
+
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R1, ISTATE_STACK-ISTATE_NEXT_FRAME);
+
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_enter);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_reg(&codebuf, ARM_R1, Rstack);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+//
+// SYNCHRONIZED_EXIT Stub
+//   r0 = bci
+//   r1 = monitor
+  handlers[H_SYNCHRONIZED_EXIT] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME);
+
+  ldr_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK-ISTATE_NEXT_FRAME);
+  str_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_BCP-ISTATE_NEXT_FRAME);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_exit);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
+
+// DEBUG_METHDENTRY
+  handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
+  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(&codebuf, ARM_R2, ARM_R0);
+  mov_reg(&codebuf, ARM_R0, ARM_R8);
+  mov_reg(&codebuf, ARM_R1, ARM_R4);
+  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodEntry);
+  blx_reg(&codebuf, ARM_IP);
+  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+// DEBUG_METHODEXIT
+  handlers[H_DEBUG_METHODEXIT] = out_pos(&codebuf);
+  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(&codebuf, ARM_R0, ARM_R8);
+  mov_reg(&codebuf, ARM_R1, ARM_R4);
+  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodExit);
+  blx_reg(&codebuf, ARM_IP);
+  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+// DEBUG_METHODCALL
+  handlers[H_DEBUG_METHODCALL] = out_pos(&codebuf);
+  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(&codebuf, ARM_R2, ARM_R0);
+  mov_reg(&codebuf, ARM_R0, ARM_R8);
+  mov_reg(&codebuf, ARM_R1, ARM_R4);
+  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodCall);
+  blx_reg(&codebuf, ARM_IP);
+  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
+
+// EXIT_TO_INTERPRETER
+//   r0 = bci
+  handlers[H_EXIT_TO_INTERPRETER] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Exit_To_Interpreter);
+  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// H_SAFEPOINT
+  handlers[H_SAFEPOINT] = out_pos(&codebuf);
+  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+
+  // The frame walking code used by the garbage collector
+  // (frame::interpreter_frame_tos_address()) assumes that the stack
+  // pointer points one word below the top item on the stack, so we
+  // have to adjust the SP saved in istate accordingly.  If we don't,
+  // the value on TOS won't be seen by the GC and we will crash later.
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK);
+
+  // Set up BytecodeInterpreter->_bcp for the GC
+  // bci+CONSTMETHOD_CODEOFFSET is passed in ARM_R1
+  // istate is passed in ARM_R2
+  ldr_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_METHOD);
+  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD);
+  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_BCP);
+
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_SafePoint);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  blx_reg(&codebuf, ARM_IP);
+  ldm(&codebuf, (1<<ARM_LR), ARM_SP, POP_FD, 1);
+  cmp_imm(&codebuf, ARM_R0, 0);
+
+  // The sequence here is delicate.  We need to seet things up so that
+  // it looks as though Thumb2_Handle_Exception_NoRegs was called
+  // directly from a compiled method.
+  mov_reg(&codebuf, ARM_PC, ARM_LR, COND_EQ);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception_NoRegs);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+  // Disassemble the codebuf we just created.  For debugging
+#ifdef T2_PRINT_DISASS
+  if (PrintAssembly) {
+    Hsdis hsdis;
+    hsdis.decode_instructions(begin_thumb_code, cb->hp + codebuf.idx * 2,
+			      print_address, NULL, NULL, stderr,
+			      Thumb2 ? "force-thumb" : "");
+    fputc('\n', stderr);
+  }
+#endif
+
+  Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf.idx * 2);
+  cb->hp += codebuf.idx * 2;
+
+  thumb2_codebuf = cb;
+
+#ifdef THUMB2_JVMTI
+  // cache the end of the generated stub region for notification later
+  stub_gen_code_end = cb->hp;
+#endif // THUMB2_JVMTI
+}
+
+#endif // T2JIT
+
+#endif // __arm__
--- a/src/cpu/zero/vm/thumb2.cpp	Wed Apr 16 16:09:05 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7985 +0,0 @@
-/*
- * Copyright 2009, 2010 Edward Nevill
- * Copyright 2012, 2013 Red Hat
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifdef __arm__
-
-#undef T2JIT
-#if !defined(DISABLE_THUMB2) && defined(HOTSPOT_ASM) && !defined(SHARK)
-#define T2JIT
-#endif
-
-#ifdef T2JIT
-
-// setting DISABLE_THUMB2_JVMTI at build time disables notification
-// of JVMTI dynamic_generate and compiled_method_load events
-#undef THUMB2_JVMTI
-#if !defined(DISABLE_THUMB2_JVMTI)
-#define THUMB2_JVMTI
-#endif
-
-#define T2_PRINT_COMPILATION
-#define T2_PRINT_STATISTICS
-#define T2_PRINT_DISASS
-#define T2_PRINT_REGUSAGE
-
-#define T2EE_PRINT_REGUSAGE
-#define CODE_ALIGN 64
-
-#define SLOW_ENTRY_OFFSET 24
-#define FAST_ENTRY_OFFSET 40
-
-#ifdef T2_PRINT_STATISTICS
-static char *t2_print_statistics;
-#endif
-
-#ifdef T2_PRINT_REGUSAGE
-static char *t2_print_regusage;
-#endif
-
-static char *t2_ospace;
-#define OSPACE t2_ospace
-
-#ifdef PRODUCT
-#define THUMB2_CODEBUF_SIZE (8 * 1024 * 1024)
-#else
-#define THUMB2_CODEBUF_SIZE (4 * 1024 * 1024)
-#endif
-#define THUMB2_MAX_BYTECODE_SIZE 10000
-#define THUMB2_MAX_T2CODE_SIZE 65000
-#define THUMB2_MAXLOCALS 1000
-
-#include <sys/mman.h>
-#include <ucontext.h>
-#include "precompiled.hpp"
-#include "interpreter/bytecodes.hpp"
-#include "compiler/compilerOracle.hpp"
-
-#define opc_nop			0x00
-#define opc_aconst_null		0x01
-#define opc_iconst_m1		0x02
-#define opc_iconst_0		0x03
-#define opc_iconst_1		0x04
-#define opc_iconst_2		0x05
-#define opc_iconst_3		0x06
-#define opc_iconst_4		0x07
-#define opc_iconst_5		0x08
-#define opc_lconst_0		0x09
-#define opc_lconst_1		0x0a
-#define opc_fconst_0		0x0b
-#define opc_fconst_1		0x0c
-#define opc_fconst_2		0x0d
-#define opc_dconst_0		0x0e
-#define opc_dconst_1		0x0f
-#define opc_bipush		0x10
-#define opc_sipush		0x11
-#define opc_ldc			0x12
-#define opc_ldc_w		0x13
-#define opc_ldc2_w		0x14
-#define opc_iload		0x15
-#define opc_lload		0x16
-#define opc_fload		0x17
-#define opc_dload		0x18
-#define opc_aload		0x19
-#define opc_iload_0		0x1a
-#define opc_iload_1		0x1b
-#define opc_iload_2		0x1c
-#define opc_iload_3		0x1d
-#define opc_lload_0		0x1e
-#define opc_lload_1		0x1f
-#define opc_lload_2		0x20
-#define opc_lload_3		0x21
-#define opc_fload_0		0x22
-#define opc_fload_1		0x23
-#define opc_fload_2		0x24
-#define opc_fload_3		0x25
-#define opc_dload_0		0x26
-#define opc_dload_1		0x27
-#define opc_dload_2		0x28
-#define opc_dload_3		0x29
-#define opc_aload_0		0x2a
-#define opc_aload_1		0x2b
-#define opc_aload_2		0x2c
-#define opc_aload_3		0x2d
-#define opc_iaload		0x2e
-#define opc_laload		0x2f
-#define opc_faload		0x30
-#define opc_daload		0x31
-#define opc_aaload		0x32
-#define opc_baload		0x33
-#define opc_caload		0x34
-#define opc_saload		0x35
-#define opc_istore		0x36
-#define opc_lstore		0x37
-#define opc_fstore		0x38
-#define opc_dstore		0x39
-#define opc_astore		0x3a
-#define opc_istore_0		0x3b
-#define opc_istore_1		0x3c
-#define opc_istore_2		0x3d
-#define opc_istore_3		0x3e
-#define opc_lstore_0		0x3f
-#define opc_lstore_1		0x40
-#define opc_lstore_2		0x41
-#define opc_lstore_3		0x42
-#define opc_fstore_0		0x43
-#define opc_fstore_1		0x44
-#define opc_fstore_2		0x45
-#define opc_fstore_3		0x46
-#define opc_dstore_0		0x47
-#define opc_dstore_1		0x48
-#define opc_dstore_2		0x49
-#define opc_dstore_3		0x4a
-#define opc_astore_0		0x4b
-#define opc_astore_1		0x4c
-#define opc_astore_2		0x4d
-#define opc_astore_3		0x4e
-#define opc_iastore		0x4f
-#define opc_lastore		0x50
-#define opc_fastore		0x51
-#define opc_dastore		0x52
-#define opc_aastore		0x53
-#define opc_bastore		0x54
-#define opc_castore		0x55
-#define opc_sastore		0x56
-#define opc_pop			0x57
-#define opc_pop2		0x58
-#define opc_dup			0x59
-#define opc_dup_x1		0x5a
-#define opc_dup_x2		0x5b
-#define opc_dup2		0x5c
-#define opc_dup2_x1		0x5d
-#define opc_dup2_x2		0x5e
-#define opc_swap		0x5f
-#define opc_iadd		0x60
-#define opc_ladd		0x61
-#define opc_fadd		0x62
-#define opc_dadd		0x63
-#define opc_isub		0x64
-#define opc_lsub		0x65
-#define opc_fsub		0x66
-#define opc_dsub		0x67
-#define opc_imul		0x68
-#define opc_lmul		0x69
-#define opc_fmul		0x6a
-#define opc_dmul		0x6b
-#define opc_idiv		0x6c
-#define opc_ldiv		0x6d
-#define opc_fdiv		0x6e
-#define opc_ddiv		0x6f
-#define opc_irem		0x70
-#define opc_lrem		0x71
-#define opc_frem		0x72
-#define opc_drem		0x73
-#define opc_ineg		0x74
-#define opc_lneg		0x75
-#define opc_fneg		0x76
-#define opc_dneg		0x77
-#define opc_ishl		0x78
-#define opc_lshl		0x79
-#define opc_ishr		0x7a
-#define opc_lshr		0x7b
-#define opc_iushr		0x7c
-#define opc_lushr		0x7d
-#define opc_iand		0x7e
-#define opc_land		0x7f
-#define opc_ior			0x80
-#define opc_lor			0x81
-#define opc_ixor		0x82
-#define opc_lxor		0x83
-#define opc_iinc		0x84
-#define opc_i2l			0x85
-#define opc_i2f			0x86
-#define opc_i2d			0x87
-#define opc_l2i			0x88
-#define opc_l2f			0x89
-#define opc_l2d			0x8a
-#define opc_f2i			0x8b
-#define opc_f2l			0x8c
-#define opc_f2d			0x8d
-#define opc_d2i			0x8e
-#define opc_d2l			0x8f
-#define opc_d2f			0x90
-#define opc_i2b			0x91
-#define opc_i2c			0x92
-#define opc_i2s			0x93
-#define opc_lcmp		0x94
-#define opc_fcmpl		0x95
-#define opc_fcmpg		0x96
-#define opc_dcmpl		0x97
-#define opc_dcmpg		0x98
-#define opc_ifeq		0x99
-#define opc_ifne		0x9a
-#define opc_iflt		0x9b
-#define opc_ifge		0x9c
-#define opc_ifgt		0x9d
-#define opc_ifle		0x9e
-#define opc_if_icmpeq		0x9f
-#define opc_if_icmpne		0xa0
-#define opc_if_icmplt		0xa1
-#define opc_if_icmpge		0xa2
-#define opc_if_icmpgt		0xa3
-#define opc_if_icmple		0xa4
-#define opc_if_acmpeq		0xa5
-#define opc_if_acmpne		0xa6
-#define opc_goto		0xa7
-#define opc_jsr			0xa8
-#define opc_ret			0xa9
-#define opc_tableswitch		0xaa
-#define opc_lookupswitch	0xab
-#define opc_ireturn		0xac
-#define opc_lreturn		0xad
-#define opc_freturn		0xae
-#define opc_dreturn		0xaf
-#define opc_areturn		0xb0
-#define opc_return		0xb1
-#define opc_getstatic		0xb2
-#define opc_putstatic		0xb3
-#define opc_getfield		0xb4
-#define opc_putfield		0xb5
-#define opc_invokevirtual	0xb6
-#define opc_invokespecial	0xb7
-#define opc_invokestatic	0xb8
-#define opc_invokeinterface	0xb9
-#define opc_invokedynamic	0xba
-#define opc_new			0xbb
-#define opc_newarray		0xbc
-#define opc_anewarray		0xbd
-#define opc_arraylength		0xbe
-#define opc_athrow		0xbf
-#define opc_checkcast		0xc0
-#define opc_instanceof		0xc1
-#define opc_monitorenter	0xc2
-#define opc_monitorexit		0xc3
-#define opc_wide		0xc4
-#define opc_multianewarray	0xc5
-#define opc_ifnull		0xc6
-#define opc_ifnonnull		0xc7
-#define opc_goto_w		0xc8
-#define opc_jsr_w		0xc9
-#define opc_breakpoint		0xca
-
-#define OPC_LAST_JAVA_OP	0xca
-
-#define opc_fast_aldc		Bytecodes::_fast_aldc
-#define opc_fast_aldc_w		Bytecodes::_fast_aldc_w
-
-#define opc_bgetfield			0xcc
-#define opc_cgetfield			0xcd
-#define opc_igetfield			0xd0
-#define opc_lgetfield			0xd1
-#define opc_sgetfield			0xd2
-#define opc_aputfield			0xd3
-#define opc_bputfield			0xd4
-#define opc_cputfield			0xd5
-#define opc_iputfield			0xd8
-#define opc_lputfield			0xd9
-#define opc_iaccess_0			0xdb
-#define opc_iaccess_1			0xdc
-#define opc_iaccess_2			0xdd
-#define opc_iaccess_3			0xde
-#define opc_invokeresolved		0xdf
-#define opc_invokespecialresolved	0xe0
-#define opc_invokestaticresolved	0xe1
-#define opc_invokevfinal		0xe2
-#define opc_iload_iload			0xe3
-
-#define opc_return_register_finalizer   0xe7
-#define opc_dmac                        0xe8
-#define opc_iload_0_iconst_N            0xe9
-#define opc_iload_1_iconst_N            0xea
-#define opc_iload_2_iconst_N            0xeb
-#define opc_iload_3_iconst_N            0xec
-#define opc_iload_iconst_N              0xed
-#define opc_iadd_istore_N               0xee
-#define opc_isub_istore_N               0xef
-#define opc_iand_istore_N               0xf0
-#define opc_ior_istore_N                0xf1
-#define opc_ixor_istore_N               0xf2
-#define opc_iadd_u4store                0xf3
-#define opc_isub_u4store                0xf4
-#define opc_iand_u4store                0xf5
-#define opc_ior_u4store                 0xf6
-#define opc_ixor_u4store                0xf7
-#define opc_iload_0_iload               0xf8
-#define opc_iload_1_iload               0xf9
-#define opc_iload_2_iload               0xfa
-#define opc_iload_3_iload               0xfb
-#define opc_iload_0_iload_N             0xfc
-#define opc_iload_1_iload_N             0xfd
-#define opc_iload_2_iload_N             0xfe
-#define opc_iload_3_iload_N             0xff
-
-
-#define H_IREM				0
-#define H_IDIV				1
-#define H_LDIV				2
-#define H_LREM				3
-#define H_FREM				4
-#define H_DREM				5
-#define	H_LDC				6
-#define H_NEW				8
-#define H_I2F				9
-#define H_I2D				10
-#define H_L2F				11
-#define H_L2D				12
-#define H_F2I				13
-#define H_F2L				14
-#define H_F2D				15
-#define H_D2I				16
-#define H_D2L				17
-#define H_D2F				18
-#define H_NEWARRAY			19
-#define H_ANEWARRAY			20
-#define H_MULTIANEWARRAY		21
-#define H_INSTANCEOF			22
-#define H_CHECKCAST			23
-#define H_AASTORE			24
-#define H_APUTFIELD			25
-#define H_SYNCHRONIZED_ENTER		26
-#define H_SYNCHRONIZED_EXIT		27
-
-#define H_EXIT_TO_INTERPRETER		28
-
-#define H_RET				H_EXIT_TO_INTERPRETER
-#define H_DEADCODE			H_EXIT_TO_INTERPRETER
-#define H_ATHROW			H_EXIT_TO_INTERPRETER
-
-#define H_HANDLE_EXCEPTION		29
-#define H_ARRAYBOUND			30
-
-#define H_LDC_W				31
-
-#define H_DEBUG_METHODENTRY		32
-#define H_DEBUG_METHODEXIT		33
-#define H_DEBUG_METHODCALL		34
-
-#define H_INVOKEINTERFACE		35
-#define H_INVOKEVIRTUAL			36
-#define H_INVOKESTATIC			37
-#define H_INVOKESPECIAL			38
-
-#define H_GETFIELD_WORD			39
-#define H_GETFIELD_SH			40
-#define H_GETFIELD_H			41
-#define H_GETFIELD_SB			42
-#define H_GETFIELD_DW			43
-
-#define H_PUTFIELD_WORD			44
-#define H_PUTFIELD_H			45
-#define H_PUTFIELD_B			46
-#define H_PUTFIELD_A			47
-#define H_PUTFIELD_DW			48
-
-#define H_GETSTATIC_WORD		49
-#define H_GETSTATIC_SH			50
-#define H_GETSTATIC_H			51
-#define H_GETSTATIC_SB			52
-#define H_GETSTATIC_DW			53
-
-#define H_PUTSTATIC_WORD		54
-#define H_PUTSTATIC_H			55
-#define H_PUTSTATIC_B			56
-#define H_PUTSTATIC_A			57
-#define H_PUTSTATIC_DW			58
-
-#define H_STACK_OVERFLOW		59
-
-#define H_HANDLE_EXCEPTION_NO_REGS	60
-
-#define H_INVOKESTATIC_RESOLVED		61
-#define H_INVOKESPECIAL_RESOLVED	62
-#define H_INVOKEVIRTUAL_RESOLVED	63
-#define H_INVOKEVFINAL			64
-
-#define H_MONITORENTER			65
-#define H_MONITOREXIT			66
-
-#define H_SAFEPOINT              	67
-
-#define H_LAST                          68  // Not used
-
-unsigned handlers[H_LAST];
-
-#define LEAF_STACK_SIZE			200
-#define STACK_SPARE			40
-
-#define COMPILER_RESULT_FAILED	1	// Failed to compiled this method
-#define COMPILER_RESULT_FATAL	2	// Fatal - dont try compile anything ever again
-
-#include <setjmp.h>
-
-static jmp_buf compiler_error_env;
-
-#define J_BogusImplementation() longjmp(compiler_error_env, COMPILER_RESULT_FAILED)
-
-#ifdef PRODUCT
-
-#define JASSERT(cond, msg)	
-#define J_Unimplemented() longjmp(compiler_error_env, COMPILER_RESULT_FATAL)
-#define JDEBUG_( _j_ )        
-
-#else
-
-#define JASSERT(cond, msg)	do { if (!(cond)) fatal(msg); } while (0)
-#define J_Unimplemented()       { report_unimplemented(__FILE__, __LINE__); BREAKPOINT; }
-#define JDEBUG_( _j_ )          _j_
-
-#endif // PRODUCT
-
-#define GET_NATIVE_U2(p)	(*(unsigned short *)(p))
-#define GET_NATIVE_U4(p)	(*(unsigned *)(p))
-
-#define GET_JAVA_S1(p)		(((signed char *)(p))[0])
-#define GET_JAVA_S2(p)  	((((signed char *)(p))[0] << 8) + (p)[1])
-#define GET_JAVA_U2(p)		(((p)[0] << 8) + (p)[1])
-#define GET_JAVA_U4(p)		(((p)[0] << 24) + ((p)[1] << 16) + ((p)[2] << 8) + (p)[3])
-
-#define BYTESEX_REVERSE(v) (((v)<<24) | (((v)<<8) & 0xff0000) | (((v)>>8) & 0xff00) | ((v)>>24))
-#define BYTESEX_REVERSE_U2(v) (((v)<<8) | ((v)>>8))
-
-// n.b. this value is chosen because it is an illegal thumb2 instruction
-#define THUMB2_POLLING_PAGE_MAGIC 0xdead
-
-typedef struct Thumb2_CodeBuf {
-  unsigned size;
-  char *sp;
-  char *hp;
-} Thumb2_CodeBuf;
-
-Thumb2_CodeBuf *thumb2_codebuf;
-
-unsigned bc_stackinfo[THUMB2_MAX_BYTECODE_SIZE];
-unsigned locals_info[1000];
-unsigned stack[1000];
-unsigned r_local[1000];
-
-#ifdef THUMB2_JVMTI
-// jvmti needs to map start address of generated code for a bytecode
-// to corresponding bytecode index so agents can correlate code address
-// ranges with bci and thence line number
-static jvmtiAddrLocationMap *address_bci_map = NULL;
-static jint address_bci_map_length = 0;
-
-static void *stub_gen_code_start = 0;
-static void *stub_gen_code_end = 0;
-
-// function used to lazily initialize the address to bci translation map
-// the first time a compiled method is generated.
-static void address_bci_map_init(JavaThread *thread)
-{
-  // the dynamic_code_generated event posted to notify generation of
-  // the stub code has to be posted lazily because generation happens
-  // in Thumb2_Initialize under bci_init and the latter is called as a
-  // side-effect of loading libjvm.o. we don't have a Java thread at
-  // that point nor, indeed, any agents to catch the notify. so the
-  // info cached by Thumb2_Initialize needs to be posted when the
-  // first compiled method load event is notified, at which point we
-  // will indeed have a current thread.
-
-  {
-    // a thread transition from in Java to in VM is required before
-    // calling into Jvmti
-
-    ThreadInVMfromJava transition(thread);
-
-    JvmtiExport::post_dynamic_code_generated("thumb2_dynamic_stubs_block",
-					   stub_gen_code_start,
-					   stub_gen_code_end);
-
-    // n.b. exiting this block reverts the thread state to in Java
-  }
-  
-
-  // the map is lazily allocated so we don't use the space unless we
-  // are actually using the JIT
-
-  // at worst we need a start address for every bytecode so
-  // the map size is limited by the compiler's bytecode limit
-  address_bci_map = new jvmtiAddrLocationMap[THUMB2_MAX_BYTECODE_SIZE];
-}
-
-// clear the address to bci translation map
-static void address_bci_map_reset(JavaThread *thread)
-{
-  // this only gets called after obtaining the compiler lock so there
-  // is no need to worry about races
-  
-  if (address_bci_map == NULL) {
-    address_bci_map_init(thread);
-  }
-
-  // this effectively clears the previous map
-
-  address_bci_map_length = 0;
-}
-
-// add an entry to the address to bci translation map
-// this will never exceed the available space
-static void address_bci_map_add(void *addr, unsigned bci)
-{
-    address_bci_map[address_bci_map_length].start_address = addr;
-    address_bci_map[address_bci_map_length].location = bci;
-    address_bci_map_length++;
-}
-#endif // THUMB2_JVMTI
-
-#ifdef T2_PRINT_DISASS
-short start_bci[THUMB2_MAX_T2CODE_SIZE];
-short end_bci[THUMB2_MAX_T2CODE_SIZE];
-#endif
-
-bool DebugSwitch = false;
-
-// XXX hardwired constants!
-#define ENTRY_FRAME             1
-#define INTERPRETER_FRAME       2
-#define SHARK_FRAME             3
-#define FAKE_STUB_FRAME         4
-
-#include "offsets_arm.s"
-
-#define BC_FLAGS_MASK		0xf0000000
-#define BC_VISITED		0x80000000
-#define BC_BRANCH_TARGET	0x40000000
-#define BC_COMPILED		0x20000000
-#define BC_BACK_TARGET		0x10000000
-
-#define IS_DEAD(x)	(((x) & BC_VISITED) == 0)
-
-#define LOCAL_MODIFIED		31
-#define LOCAL_REF		30
-#define LOCAL_DOUBLE		29
-#define LOCAL_FLOAT		28
-#define LOCAL_LONG		27
-#define LOCAL_INT		26
-#define LOCAL_ALLOCATED		25
-
-#define LOCAL_COUNT_BITS	10
-#define LOCAL_READ_POS		0
-#define LOCAL_WRITE_POS		LOCAL_COUNT_BITS
-
-#define LOCAL_READS(x)		(((x) >> LOCAL_READ_POS) & ((1<<LOCAL_COUNT_BITS)-1))
-#define LOCAL_WRITES(x)		(((x) >> LOCAL_WRITE_POS) & ((1<<LOCAL_COUNT_BITS)-1))
-#define LOCAL_SET_COUNTS(r, w)	(((r) << LOCAL_READ_POS) | (((w) << LOCAL_WRITE_POS)))
-#define LOCAL_INC_COUNT(c)	((c) < ((1<<LOCAL_COUNT_BITS)-1) ? (c)+1 : (c))
-
-#define STACK_REGS	4
-#define FP_STACK_REGS	4
-
-typedef unsigned	u32;
-typedef unsigned	Reg;
-
-#define	ARM_R0		0
-#define ARM_R1		1
-#define ARM_R2		2
-#define ARM_R3		3
-#define ARM_R4		4
-#define ARM_R5		5
-#define ARM_R6		6
-#define ARM_R7		7
-#define ARM_R8		8
-#define ARM_R9		9
-#define ARM_R10		10
-#define ARM_R11		11
-#define ARM_IP		12
-#define ARM_SP		13
-#define ARM_LR		14
-#define ARM_PC		15
-#define ARM_CPSR	16	// CPSR in sigcontext
-#define ARM_FAULT	17	// fault address in sigcontext
-
-#define CPSR_THUMB_BIT	(1<<5)
-
-#define VFP_S0		32
-#define VFP_S1		33
-#define VFP_S2		34
-#define VFP_S3		35
-#define VFP_S4		36
-#define VFP_S5		37
-#define VFP_S6		38
-#define VFP_S7		39
-
-#define VFP_D0		64
-#define VFP_D1		65
-#define VFP_D2		66
-#define VFP_D3		67
-#define VFP_D4		68
-#define VFP_D5		69
-#define VFP_D6		70
-#define VFP_D7		71
-
-#define PREGS	6
-
-#define JAZ_V1	ARM_R5
-#define JAZ_V2	ARM_R6
-#define JAZ_V3	ARM_R7
-#define JAZ_V4	ARM_R8
-#define JAZ_V5	ARM_R9
-#define JAZ_V6	ARM_R11
-
-#define Rstack		ARM_R4
-#define Rlocals		ARM_R7
-#define Ristate		ARM_R8
-#define Rthread		ARM_R10
-
-#define Rint_jpc	ARM_R5
-
-#define IS_ARM_INT_REG(r) ((r) <= ARM_PC)
-#define IS_ARM_FP_REG(r) (!IS_ARM_INT_REG(r))
-
-#define I_REGSET	((1<<ARM_R4) | (1<<ARM_R5) | (1<<ARM_R6) | (1<<ARM_R7) | \
-			 (1<<ARM_R9) | (1<<ARM_R10) | (1<<ARM_R11))
-#define C_REGSET	(1<<ARM_R8)
-
-#define LOG2(n) binary_log2(n)
-
-unsigned binary_log2(unsigned n)
-{
-  unsigned r = 0;
-  if ((n & 0xffff) == 0) r = 16, n >>= 16;
-  if ((n & 0xff) == 0) r += 8, n >>= 8;
-  if ((n & 0xf) == 0) r += 4, n >>= 4;
-  if ((n & 3) == 0) r += 2, n >>= 2;
-  if ((n & 1) == 0) r += 1;
-  return r;
-}
-
-typedef struct Compiled_Method {
-    // All entry points aligned on a cache line boundary
-    //		.align	CODE_ALIGN
-    // slow_entry:				@ callee save interface
-    // 		push	{r4, r5, r6, r7, r9, r10, r11, lr}
-    // 		mov	Rthread, r2
-    // 		bl	fast_entry
-    // 		pop	{r4, r5, r6, r7, r9, r10, r11, pc}
-    unsigned slow_entry[4];
-    unsigned *osr_table;			// pointer to the osr table
-    unsigned *exception_table;
-    Compiled_Method *next;
-    // The next 6 halfword give the register mapping for JAZ_V1 to JAZ_v5
-    // This is used when receovering from an exception so we can push
-    // the register back into the local variables pool.
-    short regusage[6];
-    unsigned header_end[1];
-    // fast_entry:
-    // 		push	{r8, lr}
-    // 		...	@ The compiled code
-    // 		pop	{r8, pc}
-    // 		.align	WORD_ALIGN
-    // code_handle:				@ from interpreted entry
-    // 		.word	slow_entry		@ bottom bit must be set!
-    // osr_table:
-    // 		.word	<no. of entries>
-    // @@@ For bytecode 0 and for each backwards branch target
-    // 		.short	<bytecode index>
-    // 		.short	<code offset>		@ offset in halfwords from slow_entry
-} Compiled_Method;
-
-Compiled_Method *compiled_method_list = 0;
-Compiled_Method **compiled_method_list_tail_ptr = &compiled_method_list;
-
-typedef struct Thumb2_Entrypoint {
-  unsigned compiled_entrypoint;
-  short *regusage;
-} Thumb2_Entrypoint;
-
-typedef struct CodeBuf {
-    unsigned short *codebuf;
-    unsigned idx;
-    unsigned limit;
-} CodeBuf;
-
-typedef struct Thumb2_Stack {
-    unsigned *stack;
-    unsigned depth;
-} Thumb2_Stack;
-
-#define IS_SREG(r) ((r) < STACK_REGS)
-
-typedef struct Thumb2_Registers {
-    unsigned *r_local;
-    unsigned npregs;
-    unsigned pregs[PREGS];
-    int mapping[PREGS];
-} Thumb2_Registers;
-
-typedef struct Thumb2_Info {
-    JavaThread *thread;
-    methodOop method;
-    unsigned *bc_stackinfo;
-    unsigned *locals_info;
-    jubyte *code_base;
-    unsigned code_size;
-    CodeBuf *codebuf;
-    Thumb2_Stack *jstack;
-    Thumb2_Registers *jregs;
-    unsigned compiled_return;
-    unsigned compiled_word_return[12];  // R0 .. R11
-    unsigned is_leaf;
-    unsigned use_istate;
-} Thumb2_Info;
-
-#define IS_INT_SIZE_BASE_TYPE(c) (c=='B' || c=='C' || c=='F' || c=='I' || c=='S' || c=='Z')
-#define IS_INT_SIZE_TYPE(c) (IS_INT_SIZE_BASE_TYPE(c) || c == 'L' || c == '[')
-
-void Thumb2_save_local_refs(Thumb2_Info *jinfo, unsigned stackdepth);
-void Thumb2_restore_local_refs(Thumb2_Info *jinfo, unsigned stackdepth);
-void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth);
-
-static int method_stackchange(const jbyte *base)
-{
-  jbyte c;
-  int stackchange = 0;
-
-  c = *base++;
-  JASSERT(c == '(', "Invalid signature, missing '('");
-  while ((c = *base++) != ')') {
-    stackchange -= 1;
-    if (c == 'J' || c == 'D') {
-      stackchange -= 1;
-    } else if (c == '[') {
-      do { c = *base++; } while (c == '[');
-      if (c == 'L')
-	do { c = *base++; } while (c != ';');
-    } else if (c == 'L') {
-      do { c = *base++; } while (c != ';');
-    } else {
-      JASSERT(IS_INT_SIZE_BASE_TYPE(c), "Invalid signature, bad arg type");
-    }
-  }
-  JASSERT(c == ')', "Invalid signature, missing ')'");
-  c = *base++;
-  if (c == 'J' || c == 'D') stackchange += 2;
-  else if (c != 'V') {
-    stackchange += 1;
-    JASSERT(IS_INT_SIZE_TYPE(c), "Invalid signature, bad ret type");
-  }
-  return stackchange;
-}
-
-static void Thumb2_local_info_from_sig(Thumb2_Info *jinfo, methodOop method,
-				       const jbyte *base)
-{
-  jbyte c;
-  unsigned arg = 0;
-  unsigned *locals_info = jinfo->locals_info;
-  unsigned local_info;
-
-  if (!method->is_static()) locals_info[arg++] = 1 << LOCAL_REF;
-  c = *base++;
-  JASSERT(c == '(', "Invalid signature, missing '('");
-  while ((c = *base++) != ')') {
-    local_info = 1 << LOCAL_INT;
-    if (c == 'J') local_info = 1 << LOCAL_LONG;
-    else if (c == 'D') local_info = 1 << LOCAL_DOUBLE;
-    else if (c == '[') {
-      local_info = 1 << LOCAL_REF;
-      do { c = *base++; } while (c == '[');
-      if (c == 'L')
-	do { c = *base++; } while (c != ';');
-    } else if (c == 'L') {
-      local_info = 1 << LOCAL_REF;
-      do { c = *base++; } while (c != ';');
-    } else {
-      JASSERT(IS_INT_SIZE_BASE_TYPE(c), "Invalid signature, bad arg type");
-    }
-    locals_info[arg++] = local_info;
-  }
-}
-
-#define T_UNDEFINED_32	0xf7f0a000
-#define T_UNDEFINED_16	0xde00
-
-static const char *local_types[] = { "int", "long", "float", "double", "ref" };
-
-#ifdef T2_PRINT_DISASS
-
-class Hsdis {
-public:
-
-  typedef void* (*decode_instructions_event_callback_ftype)  (void*, const char*, void*);
-
-  typedef void* (*decode_instructions_ftype)
-    (void* start, void* end,
-     decode_instructions_event_callback_ftype event_callback,
-     void* event_stream,
-     void* printf_callback,
-     void* printf_stream,
-     const char* options);
-
-  decode_instructions_ftype decode_instructions;
-
-  void *lib;
-
-  // Load hsdis-arm.so lazily.
-  Hsdis()
-  {
-    decode_instructions = NULL;
-
-    if (PrintAssembly) {
-      if (lib = dlopen("hsdis-arm.so", RTLD_NOW)) {
-	decode_instructions
-	  = (typeof decode_instructions)dlsym(lib, "decode_instructions");
-      }
-
-      if (! (decode_instructions)) {
-	fprintf (stderr, "PrintAssembly (or T2_PRINT_DISASS) is set, but\n"
-		 "hsdis-arm.so has not been found or is invalid.  If you want to\n"
-		 "see a disassembly, please ensure that a valid copy of\n"
-		 "hsdis-arm.so is present somewhere in your library load path.\n");
-	abort();
-      }
-    }
-  }
-};
-
-static void *print_address(void *stream, const char *tag, void *data);
-
-void Thumb2_disass(Thumb2_Info *jinfo)
-{
-  unsigned code_size = jinfo->code_size;
-  jubyte *code_base = jinfo->code_base;
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-  unsigned *locals_info = jinfo->locals_info;
-  unsigned nlocals = jinfo->method->max_locals();
-  int bci = 0;
-  int last_bci = -1;
-  int start_b, end_b;
-  unsigned nodisass;
-
-  unsigned short *codebuf = jinfo->codebuf->codebuf;
-  unsigned idx, compiled_len;
-
-  static Hsdis hsdis;
-
-  fflush(stdout);
-  fflush(stderr);
-
-  compiled_len = jinfo->codebuf->idx * 2;
-  for (idx = 0; idx < compiled_len; ) {
-    nodisass = 0;
-    start_b = start_bci[idx/2];
-    end_b = end_bci[idx/2];
-    if (start_b != -1) {
-      last_bci != -1;
-      for (bci = start_b; bci < end_b; ) {
-	unsigned stackinfo = bc_stackinfo[bci];
-	unsigned opcode;
-	int len;
-
-	if (stackinfo & BC_BRANCH_TARGET)
-	  fprintf(stderr, "----- Basic Block -----\n");
-	JASSERT(bci > last_bci, "disass not advancing");
-	last_bci = bci;
-	fprintf(stderr, "%c%4d : ", (stackinfo & BC_VISITED) ? ' ' : '?', bci);
-	opcode = code_base[bci];
-	if (opcode > OPC_LAST_JAVA_OP) {
-	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
-	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
-	}
-	len = Bytecodes::length_for((Bytecodes::Code)opcode);
-	if (len <= 0) {
-	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
-	  len = (Bytecodes::special_length_at
-		 (code,
-		  (address)(code_base+bci), (address)(code_base+code_size)));
-	}
-	switch (opcode) {
-	  case opc_tableswitch: {
-	    int nbci = (bci & ~3) + 4;
-	    int low, high;
-	    unsigned w;
-	    unsigned *table;
-	    int def;
-	    unsigned n, i;
-
-	    fprintf(stderr, "%02x ", opcode);
-	    for (int i = 1; i < 5; i++)
-	      fprintf(stderr, "   ");
-	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
-	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
-	    w = *(unsigned int *)(code_base + nbci + 4);
-	    low = (int)BYTESEX_REVERSE(w);
-	    w = *(unsigned int *)(code_base + nbci + 8);
-	    high = (int)BYTESEX_REVERSE(w);
-	    w = *(unsigned int *)(code_base + nbci + 0);
-	    def = (int)BYTESEX_REVERSE(w);
-	    table = (unsigned int *)(code_base + nbci + 12);
-	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
-	    fprintf(stderr, "\tlow:\t\t0x%08x\n", low);
-	    fprintf(stderr, "\thigh:\t\t0x%08x\n", high);
-	    n = high - low + 1;
-	    while (low <= high) {
-	      int off;
-
-	      w = *table++;
-	      off = (int)BYTESEX_REVERSE(w);
-	      fprintf(stderr, "\toffset %d:\t0x%08x\n", low, off);
-	      low++;
-	    }
-	    bci += len;
-	    {
-	      // The insn sequence generated by tableswitch is 14
-	      // bytes long.
-	      const int tableswitch_code_len = 14;
-	      fprintf(stderr, "0x%08x:\t", (int)codebuf+idx);
-		unsigned short *p = codebuf + idx/2;
-		hsdis.decode_instructions((char*)p,
-					  (char *)p + tableswitch_code_len,
-					  print_address, NULL, NULL, stdout,
-					  "force-thumb");
-		idx += tableswitch_code_len;
-	    }
-	    for (i = 0; i < n; i++) {
-	      fprintf(stderr, "0x%08x:\t.short\t0x%04x\n", (int)codebuf+idx, *(short *)((int)codebuf + idx));
-	      idx += 2;
-	    }
-	    nodisass = 1;
-	    break;
-	  }
-	  case opc_lookupswitch: {
-	    unsigned w;
-	    unsigned nbci = (bci & ~3) + 4;;
-	    int def;
-	    int npairs;	// The Java spec says signed but must be >= 0??
-	    unsigned *table;
-
-	    fprintf(stderr, "%02x ", opcode);
-	    for (int i = 1; i < 5; i++)
-	      fprintf(stderr, "   ");
-	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
-	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
-
-	    w = *(unsigned int *)(code_base + nbci + 0);
-	    def = (int)BYTESEX_REVERSE(w);
-	    w = *(unsigned int *)(code_base + nbci + 4);
-	    npairs = (int)BYTESEX_REVERSE(w);
-	    table = (unsigned int *)(code_base + nbci + 8);
-	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
-	    fprintf(stderr, "\tnpairs:\t\t0x%08x\n", npairs);
-	    for (int i = 0; i < npairs; i++) {
-	      unsigned match, off;
-	      w = table[0];
-	      match = BYTESEX_REVERSE(w);
-	      w = table[1];
-	      table += 2;
-	      off = BYTESEX_REVERSE(w);
-	      fprintf(stderr, "\t  match: 0x%08x, offset: 0x%08x\n", match, off);
-	    }
-	    break;
-	  }
-
-	  default:
-	    for (int i = 0; i < 5; i++) {
-	      if (i < len)
-		fprintf(stderr, "%02x ", code_base[bci+i]);
-	      else
-		fprintf(stderr, "   ");
-	    }
-	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)code_base[bci]));
-	    break;
-	}
-	bci += len;
-      }
-    }
-    if (!nodisass) {
-      {
-	int len;
-	unsigned s1, s2;
-
-	s1 = *(unsigned short *)((int)codebuf + idx);
-	s2 = *(unsigned short *)((int)codebuf + idx + 2);
-	if (s1 == T_UNDEFINED_16 || ((s1 << 16) + s2) == T_UNDEFINED_32) {
-	  if (s1 == T_UNDEFINED_16) {
-	    fprintf(stderr, "undefined (0xde00) - UNPATCHED BRANCH???");
-	    len = 2;
-	  } else {
-	    fprintf(stderr, "undefined (0xf7f0a000) - UNPATCHED BRANCH???");
-	    len = 4;
-	  }
-	} else {
-	  char *p = (char*)codebuf + idx;
-	  len = 2;
-	  while (len + idx < compiled_len
-		 && start_bci[(len + idx)/2] == -1)
-	    len += 2;
-	  hsdis.decode_instructions((char*)p, (char*)p + len,
-				      print_address, NULL, NULL, stderr,
-				      "force-thumb");
-	}
-	idx += len;
-      }
-    }
-  }
-  fflush(stderr);
-}
-// where
-static void *print_address(void *, const char *tag, void *data) {
-  if (strcmp(tag, "insn") == 0)
-    fprintf(stderr, "0x%08x:\t", (unsigned int)data);
-  return NULL;
-}
-#endif // T2_PRINT_DISASS
-
-#define BCI(len, pop, push, special, islocal, islocal_n, isstore, local_n, local_type) \
-	((len) | ((pop)<<3) | ((push)<<6) | (unsigned)((special) << 31) | ((islocal) << 30) | ((islocal_n) << 29) | ((isstore) << 28) | ((local_n) << 9) | ((local_type) << 11))
-
-#define BCI_LEN(x) 	((x) & 7)
-#define BCI_POP(x) 	(((x)>>3) & 7)
-#define BCI_PUSH(x) 	(((x)>>6) & 7)
-#define BCI_LOCAL_N(x)	(((x)>>9) & 3)
-#define BCI_LOCAL_TYPE(x) (((x) >> 11) & 7)
-
-#define BCI_TYPE_INT	0
-#define BCI_TYPE_LONG	1
-#define BCI_TYPE_FLOAT	2
-#define BCI_TYPE_DOUBLE	3
-#define BCI_TYPE_REF	4
-
-#define BCI_SPECIAL(x) 	((x) & 0x80000000)
-#define BCI_ISLOCAL(x)	((x) & 0x40000000)
-#define BCI_ISLOCAL_N(x) ((x) & 0x20000000)
-#define BCI_ISSTORE(x)	((x) & 0x10000000)
-
-static const unsigned bcinfo[256] = {
-	BCI(1, 0, 0, 0, 0, 0, 0, 0, 0),	// nop
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// aconst_null
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_m1
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_0
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_1
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_2
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_3
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_4
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// iconst_5
-	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// lconst_0
-	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// lconst_1
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_0
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_1
-	BCI(1, 0, 1, 0, 0, 0, 0, 0, 0),	// fconst_2
-	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// dconst_0
-	BCI(1, 0, 2, 0, 0, 0, 0, 0, 0),	// dconst_1
-	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// bipush
-	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// bipush
-	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// ldc
-	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// ldc_w
-	BCI(3, 0, 2, 0, 0, 0, 0, 0, 0),	// ldc2_w
-	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload
-	BCI(2, 0, 2, 0, 1, 0, 0, 0, BCI_TYPE_LONG),	// lload
-	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_FLOAT),	// fload
-	BCI(2, 0, 2, 0, 1, 0, 0, 0, BCI_TYPE_DOUBLE),	// dload
-	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_REF),	// aload
-	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0
-	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1
-	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2
-	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3
-	BCI(1, 0, 2, 0, 1, 1, 0, 0, BCI_TYPE_LONG),	// lload_0
-	BCI(1, 0, 2, 0, 1, 1, 0, 1, BCI_TYPE_LONG),	// lload_1
-	BCI(1, 0, 2, 0, 1, 1, 0, 2, BCI_TYPE_LONG),	// lload_2
-	BCI(1, 0, 2, 0, 1, 1, 0, 3, BCI_TYPE_LONG),	// lload_3
-	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_FLOAT),	// fload_0
-	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_FLOAT),	// fload_1
-	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_FLOAT),	// fload_2
-	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_FLOAT),	// fload_3
-	BCI(1, 0, 2, 0, 1, 1, 0, 0, BCI_TYPE_DOUBLE),	// dload_0
-	BCI(1, 0, 2, 0, 1, 1, 0, 1, BCI_TYPE_DOUBLE),	// dload_1
-	BCI(1, 0, 2, 0, 1, 1, 0, 2, BCI_TYPE_DOUBLE),	// dload_2
-	BCI(1, 0, 2, 0, 1, 1, 0, 3, BCI_TYPE_DOUBLE),	// dload_3
-	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_REF),	// aload_0
-	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_REF),	// aload_1
-	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_REF),	// aload_2
-	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_REF),	// aload_3
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iaload
-	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// laload
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// faload
-	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// daload
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// aaload
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// baload
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// caload
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// saload
-	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_INT),	// istore
-	BCI(2, 2, 0, 0, 1, 0, 1, 0, BCI_TYPE_LONG),	// lstore
-	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_FLOAT),	// fstore
-	BCI(2, 2, 0, 0, 1, 0, 1, 0, BCI_TYPE_DOUBLE),	// dstore
-	BCI(2, 1, 0, 0, 1, 0, 1, 0, BCI_TYPE_REF),	// astore
-	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_INT),	// istore_0
-	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_INT),	// istore_1
-	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_INT),	// istore_2
-	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_INT),	// istore_3
-	BCI(1, 2, 0, 0, 1, 1, 1, 0, BCI_TYPE_LONG),	// lstore_0
-	BCI(1, 2, 0, 0, 1, 1, 1, 1, BCI_TYPE_LONG),	// lstore_1
-	BCI(1, 2, 0, 0, 1, 1, 1, 2, BCI_TYPE_LONG),	// lstore_2
-	BCI(1, 2, 0, 0, 1, 1, 1, 3, BCI_TYPE_LONG),	// lstore_3
-	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_FLOAT),	// fstore_0
-	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_FLOAT),	// fstore_1
-	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_FLOAT),	// fstore_2
-	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_FLOAT),	// fstore_3
-	BCI(1, 2, 0, 0, 1, 1, 1, 0, BCI_TYPE_DOUBLE),	// dstore_0
-	BCI(1, 2, 0, 0, 1, 1, 1, 1, BCI_TYPE_DOUBLE),	// dstore_1
-	BCI(1, 2, 0, 0, 1, 1, 1, 2, BCI_TYPE_DOUBLE),	// dstore_2
-	BCI(1, 2, 0, 0, 1, 1, 1, 3, BCI_TYPE_DOUBLE),	// dstore_3
-	BCI(1, 1, 0, 0, 1, 1, 1, 0, BCI_TYPE_REF),	// astore_0
-	BCI(1, 1, 0, 0, 1, 1, 1, 1, BCI_TYPE_REF),	// astore_1
-	BCI(1, 1, 0, 0, 1, 1, 1, 2, BCI_TYPE_REF),	// astore_2
-	BCI(1, 1, 0, 0, 1, 1, 1, 3, BCI_TYPE_REF),	// astore_3
-	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// iastore
-	BCI(1, 4, 0, 0, 0, 0, 0, 0, 0),	// dastore
-	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// fastore
-	BCI(1, 4, 0, 0, 0, 0, 0, 0, 0),	// lastore
-	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// aastore
-	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// bastore
-	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// castore
-	BCI(1, 3, 0, 0, 0, 0, 0, 0, 0),	// sastore
-	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// pop
-	BCI(1, 2, 0, 0, 0, 0, 0, 0, 0),	// pop2
-	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// dup
-	BCI(1, 2, 3, 0, 0, 0, 0, 0, 0),	// dup_x1
-	BCI(1, 3, 4, 0, 0, 0, 0, 0, 0),	// dup_x2
-	BCI(1, 2, 4, 0, 0, 0, 0, 0, 0),	// dup2
-	BCI(1, 3, 5, 0, 0, 0, 0, 0, 0),	// dup2_x1
-	BCI(1, 4, 6, 0, 0, 0, 0, 0, 0),	// dup2_x2
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// swap
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ladd
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fadd
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dadd
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lsub
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fsub
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dsub
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// imul
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lmul
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fmul
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dmul
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// idiv
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ldiv
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fdiv
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// ddiv
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// irem
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lrem
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// frem
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// drem
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// ineg
-	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// lneg
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// fneg
-	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// dneg
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ishl
-	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lshl
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ishr
-	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lshr
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iushr
-	BCI(1, 3, 2, 0, 0, 0, 0, 0, 0),	// lushr
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// land
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lor
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// lxor
-	BCI(3, 0, 0, 0, 1, 0, 1, 0, BCI_TYPE_INT),	// iinc
-	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// i2l
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2f
-	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// i2d
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// l2i
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// l2f
-	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// l2d
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// f2i
-	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// f2l
-	BCI(1, 1, 2, 0, 0, 0, 0, 0, 0),	// f2d
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// d2i
-	BCI(1, 2, 2, 0, 0, 0, 0, 0, 0),	// d2l
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// d2f
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2b
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2c
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// i2s
-	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// lcmp
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fcmpl
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// fcmpg
-	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// dcmpl
-	BCI(1, 4, 1, 0, 0, 0, 0, 0, 0),	// dcmpg
-	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifeq
-	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifne
-	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// iflt
-	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifge
-	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifgt
-	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifle
-	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpeq
-	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpne
-	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmplt
-	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpge
-	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmpgt
-	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_icmple
-	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_acmpeq
-	BCI(3, 2, 0, 1, 0, 0, 0, 0, 0),	// if_acmpne
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// goto
-	BCI(3, 0, 1, 1, 0, 0, 0, 0, 0),	// jsr
-	BCI(2, 0, 0, 1, 0, 0, 0, 0, 0),	// ret
-	BCI(0, 1, 0, 1, 0, 0, 0, 0, 0),	// tableswitch
-	BCI(0, 1, 0, 1, 0, 0, 0, 0, 0),	// lookupswitch
-	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// ireturn
-	BCI(1, 2, 0, 1, 0, 0, 0, 0, 0),	// lreturn
-	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// freturn
-	BCI(1, 2, 0, 1, 0, 0, 0, 0, 0),	// dreturn
-	BCI(1, 1, 0, 1, 0, 0, 0, 0, 0),	// areturn
-	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// return
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// getstatic
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// putstatic
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// getfield
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// putfield
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokevirtual
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokespecial
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokestatic
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokeinterface
-	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// invokedynamic
-	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// new
-	BCI(2, 1, 1, 0, 0, 0, 0, 0, 0),	// newarray
-	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// anewarray
-	BCI(1, 1, 1, 0, 0, 0, 0, 0, 0),	// arraylength
-	BCI(1, 1, 1, 1, 0, 0, 0, 0, 0),	// athrow
-	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// checkcast
-	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// instanceof
-	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// monitorenter
-	BCI(1, 1, 0, 0, 0, 0, 0, 0, 0),	// monitorexit
-	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// wide
-	BCI(4, 0, 0, 1, 0, 0, 0, 0, 0),	// multianewarray
-	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifnull
-	BCI(3, 1, 0, 1, 0, 0, 0, 0, 0),	// ifnonnull
-	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// goto_w
-	BCI(5, 0, 0, 1, 0, 0, 0, 0, 0),	// jsr_w
-	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// breakpoint
-	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xcb
-	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// bgetfield
-	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// cgetfield
-	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xce
-	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xcf
-	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// igetfield
-	BCI(3, 1, 2, 0, 0, 0, 0, 0, 0),	// lgetfield
-	BCI(3, 1, 1, 0, 0, 0, 0, 0, 0),	// sgetfield
-	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// aputfield
-	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// bputfield
-	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// cputfield
-	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xd6
-	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xd7
-	BCI(3, 2, 0, 0, 0, 0, 0, 0, 0),	// iputfield
-	BCI(3, 3, 0, 0, 0, 0, 0, 0, 0),	// lputfield
-	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// unused 0xda
-	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_REF),	// iaccess_0
-	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_REF),	// iaccess_1
-	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_REF),	// iaccess_2
-	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_REF),	// iaccess_3
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokeresolved
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokespecialresolved
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokestaticresolved
-	BCI(3, 0, 0, 1, 0, 0, 0, 0, 0),	// invokevfinal
-	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iload
-	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iload_N
-	BCI(2, 0, 1, 0, 0, 0, 0, 0, 0),	// fast_aldc
-	BCI(3, 0, 1, 0, 0, 0, 0, 0, 0),	// fast_aldc_w
-	BCI(1, 0, 0, 1, 0, 0, 0, 0, 0),	// return_register_finalizer
-	BCI(1, 4, 2, 0, 0, 0, 0, 0, 0),	// dmac
-	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iconst_N
-	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iconst_N
-	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iconst_N
-	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iconst_N
-	BCI(2, 0, 1, 0, 1, 0, 0, 0, BCI_TYPE_INT),	// iload_iconst_N
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd_istore_N
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub_istore_N
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand_istore_N
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior_istore_N
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor_istore_N
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iadd_u4store
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// isub_u4store
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// iand_u4store
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ior_u4store
-	BCI(1, 2, 1, 0, 0, 0, 0, 0, 0),	// ixor_u4store
-	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iload
-	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iload
-	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iload
-	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iload
-	BCI(1, 0, 1, 0, 1, 1, 0, 0, BCI_TYPE_INT),	// iload_0_iload_N
-	BCI(1, 0, 1, 0, 1, 1, 0, 1, BCI_TYPE_INT),	// iload_1_iload_N
-	BCI(1, 0, 1, 0, 1, 1, 0, 2, BCI_TYPE_INT),	// iload_2_iload_N
-	BCI(1, 0, 1, 0, 1, 1, 0, 3, BCI_TYPE_INT),	// iload_3_iload_N
-};
-
-void Thumb2_pass1(Thumb2_Info *jinfo, unsigned stackdepth, unsigned bci)
-{
-  unsigned code_size = jinfo->code_size;
-  jubyte *code_base = jinfo->code_base;
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-  unsigned *locals_info = jinfo->locals_info;
-  JDEBUG_ (
-   Symbol *name = jinfo->method->name();
-   Symbol *sig = jinfo->method->signature();
-  );
-  //constantPoolCacheOop cp = jinfo->method->constants()->cache();
-
-  bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-  while (bci < code_size) {
-    unsigned stackinfo = bc_stackinfo[bci];
-    unsigned bytecodeinfo;
-    unsigned opcode;
-
-    if (stackinfo & BC_VISITED) break;
-    JASSERT((int)stackdepth >= 0, "stackdepth < 0!!");
-    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | stackdepth | BC_VISITED;
-    opcode = code_base[bci];
-//	printf("bci = 0x%04x, opcode = 0x%02x (%s)", bci, opcode,  Bytecodes::name((Bytecodes::Code)opcode));
-    bytecodeinfo = bcinfo[opcode];
-    if (!BCI_SPECIAL(bytecodeinfo)) {
-      if (BCI_ISLOCAL(bytecodeinfo)) {
-	unsigned local = BCI_LOCAL_N(bytecodeinfo);
-	unsigned local_type = BCI_LOCAL_TYPE(bytecodeinfo) + LOCAL_INT;
-	unsigned local_modified = 0;
-	unsigned linfo;
-	unsigned read_count, write_count;
-
-	if (!BCI_ISLOCAL_N(bytecodeinfo)) local = code_base[bci+1];
-	if (BCI_ISSTORE(bytecodeinfo)) local_modified = 1U << LOCAL_MODIFIED;
-	linfo = locals_info[local];
-	read_count = LOCAL_READS(linfo);
-	write_count = LOCAL_WRITES(linfo);
-	if (local_modified)
-	  write_count = LOCAL_INC_COUNT(write_count);
-	else
-	  read_count = LOCAL_INC_COUNT(read_count);
-	
-	locals_info[local] |= (1 << local_type) | LOCAL_SET_COUNTS(read_count, write_count) | local_modified;
-	if (local_type == LOCAL_LONG || local_type == LOCAL_DOUBLE) {
-	  locals_info[local+1] |= (1 << local_type) | LOCAL_SET_COUNTS(read_count, write_count) | local_modified;
-	}
-      }
-      bci += BCI_LEN(bytecodeinfo);
-      stackdepth += BCI_PUSH(bytecodeinfo) - BCI_POP(bytecodeinfo);
-      JASSERT(stackdepth <= (unsigned)jinfo->method->max_stack(), "stack over/under flow?");
-      continue;
-    }
-
-    switch (opcode) {
-
-      case opc_goto: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	bci += off;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-      case opc_goto_w: {
-	int off = GET_JAVA_U4(code_base+bci+1);
-	bci += off;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_ifeq:
-      case opc_ifne:
-      case opc_iflt:
-      case opc_ifge:
-      case opc_ifgt:
-      case opc_ifle:
-      case opc_ifnull:
-      case opc_ifnonnull: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	stackdepth -= 1;
-        Thumb2_pass1(jinfo, stackdepth, bci + off);
-	bci += 3;
-	break;
-      }
-
-      case opc_if_icmpeq:
-      case opc_if_icmpne:
-      case opc_if_icmplt:
-      case opc_if_icmpge:
-      case opc_if_icmpgt:
-      case opc_if_icmple:
-      case opc_if_acmpeq:
-      case opc_if_acmpne: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	stackdepth -= 2;
-        Thumb2_pass1(jinfo, stackdepth, bci + off);
-	bci += 3;
-	break;
-      }
-
-      case opc_jsr: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
-	bci += 3;
-	stackdepth = 0;
-	break;
-      }
-      case opc_jsr_w: {
-	int off = GET_JAVA_U4(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
-	bci += 5;
-	break;
-      }
-
-      case opc_ireturn:
-      case opc_lreturn:
-      case opc_freturn:
-      case opc_dreturn:
-      case opc_areturn:
-      case opc_return:
-      case opc_return_register_finalizer:
-      case opc_ret:
-      case opc_athrow:
-	// The test for BC_VISITED above will break out of the loop!!!
-	break;
-
-      case opc_tableswitch: {
-	int low, high;
-	unsigned w;
-	unsigned *table;
-	unsigned nbci;
-	int def;
-
-	stackdepth -= 1;
-	nbci = bci & ~3;
-	w = *(unsigned int *)(code_base + nbci + 8);
-	low = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 12);
-	high = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 4);
-	def = (int)BYTESEX_REVERSE(w);
-	table = (unsigned int *)(code_base + nbci + 16);
-
-	while (low <= high) {
-	  int off;
-	  w = *table++;
-	  off = (int)BYTESEX_REVERSE(w);
-	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	  Thumb2_pass1(jinfo, stackdepth, bci + off);
-	  low++;
-	}
-
-	bci += def;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_lookupswitch: {
-	unsigned w;
-	unsigned nbci;
-	int def;
-	int npairs;	// The Java spec says signed but must be >= 0??
-	unsigned *table;
-
-	stackdepth -= 1;
-	nbci = bci & ~3;
-	w = *(unsigned int *)(code_base + nbci + 4);
-	def = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 8);
-	npairs = (int)BYTESEX_REVERSE(w);
-	table = (unsigned int *)(code_base + nbci + 16);
-
-	for (int i = 0; i < npairs; i++) {
-	  int off;
-	  w = *table;
-	  table += 2;
-	  off = (int)BYTESEX_REVERSE(w);
-	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	  Thumb2_pass1(jinfo, stackdepth, bci + off);
-	}
-
-	bci += def;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_getstatic:
-      case opc_putstatic:
-      case opc_getfield:
-      case opc_putfield: {
-	int index = GET_NATIVE_U2(code_base+bci+1);
-	constantPoolOop pool = jinfo->method->constants();
-	Symbol *sig = pool->signature_ref_at(index);
-	const jbyte *base = sig->base();
-	jbyte c = *base;
-	int stackchange;
-
-	opcode = code_base[bci];
-	if (opcode == opc_getfield || opcode == opc_putfield)
-	  stackdepth -= 1;
-	stackchange = 1;
-	if (c == 'J' || c == 'D') stackchange = 2;
-	if (opcode == opc_getfield || opcode == opc_getstatic)
-	  stackdepth += stackchange;
-	else
-	  stackdepth -= stackchange;
-	bci += 3;
-	break;
-      }
-
-      case opc_invokedynamic: {
-	int site_index = GET_NATIVE_U4(code_base+bci+1);
-	constantPoolOop pool = jinfo->method->constants();
-	int main_index = pool->cache()->secondary_entry_at(site_index)->main_entry_index();
-	JDEBUG_( int pool_index = pool->cache()->entry_at(main_index)->constant_pool_index(); );
-	Symbol *sig = pool->signature_ref_at(main_index);
-	const jbyte *base = sig->base();
-
-	JDEBUG_( tty->print("InvokeDynamic %d: %s: %s %d %d\n", opcode, name->as_C_string(), sig->as_C_string(), main_index, pool_index); );
-	stackdepth += method_stackchange(base);
-	opcode = code_base[bci];
-	bci += 5;
-	break;
-      }
-
-      case opc_invokeresolved:
-      case opc_invokespecialresolved:
-      case opc_invokestaticresolved:
-      case opc_invokevfinal:
-      case opc_invokeinterface:
-      case opc_invokevirtual:
-      case opc_invokespecial:
-      case opc_invokestatic: {
-	int index = GET_NATIVE_U2(code_base+bci+1);
-	constantPoolOop pool = jinfo->method->constants();
-	Symbol *sig = pool->signature_ref_at(index);
-	const jbyte *base = sig->base();
-
-	jinfo->is_leaf = 0;
-	JDEBUG_( tty->print("%d: %s: %s\n", opcode, name->as_C_string(), sig->as_C_string()); );
-	stackdepth += method_stackchange(base);
-	opcode = code_base[bci];
-	bci += 3;
-	if (opcode == opc_invokeinterface) bci += 2;
-	if (opcode != opc_invokestatic && opcode != opc_invokestaticresolved)
-	  stackdepth -= 1;
-	JDEBUG_( tty->print("invoke %d: %s: %s %d %d %d\n", opcode, name->as_C_string(), sig->as_C_string(),
-	         bci, index, stackdepth); );
-	break;
-      }
-
-      case opc_multianewarray:
-	stackdepth = (stackdepth - code_base[bci+3]) + 1;
-	bci += 4;
-	break;
-
-      case opc_wide: {
-	opcode = code_base[bci+1];
-	if (opcode == opc_iinc) {
-	  bci += 6;
-	} else {
-	  bci += 4;
-	  if (opcode == opc_iload ||
-	  	opcode == opc_fload || opcode == opc_aload)
-	    stackdepth += 1;
-	  else if (opcode == opc_lload || opcode == opc_dload)
-	    stackdepth += 2;
-	  else if (opcode == opc_istore ||
-	  	opcode == opc_fstore || opcode == opc_astore)
-	    stackdepth -= 1;
-	  else if (opcode == opc_lstore || opcode == opc_dstore)
-	    stackdepth -= 2;
-	  else if (opcode != opc_ret)
-	    fatal(err_msg("Undefined wide opcode %d\n", opcode));
-	}
-	break;
-      }
-
-      default:
-	opcode = code_base[bci];
-	fatal(err_msg("Undefined opcode %d\n", opcode));
-	break;
-    }
-  }
-}
-
-void Thumb2_RegAlloc(Thumb2_Info *jinfo)
-{
-  unsigned *locals_info = jinfo->locals_info;
-  unsigned i, j;
-  unsigned linfo;
-  unsigned score, max_score;
-  unsigned local;
-  unsigned nlocals = jinfo->method->max_locals();
-  unsigned *pregs = jinfo->jregs->pregs;
-  unsigned npregs = jinfo->jregs->npregs;
-
-  for (i = 0; i < npregs; i++) jinfo->jregs->mapping[i] = -1;
-  for (i = 0; i < npregs; i++) {
-    if (jinfo->use_istate && pregs[i] == Ristate) continue;
-    max_score = 0;
-    for (j = 0; j < nlocals; j++) {
-      linfo = locals_info[j];
-
-      if (linfo & ((1<<LOCAL_ALLOCATED)|(1<<LOCAL_DOUBLE))) continue;
-      score = LOCAL_READS(linfo) + LOCAL_WRITES(linfo);
-      if (linfo & (1<<LOCAL_MODIFIED)) score = (score+1) >> 2;
-      if (linfo & (1<<LOCAL_REF)) score = score - (score >> 2);
-      if (linfo & (1<<LOCAL_LONG)) score = (score+1) >> 2;
-      if (score > max_score) max_score = score, local = j;
-    }
-    if (max_score < (OSPACE ? 8 : 2)) break;
-    locals_info[local] |= 1<<LOCAL_ALLOCATED;
-    jinfo->jregs->r_local[local] = pregs[i];
-    jinfo->jregs->mapping[i] = local;
-  }
-#ifdef T2_PRINT_REGUSAGE
-  if (t2_print_regusage) {
-    fprintf(stderr, "Regalloc: %d physical registers allocated as follows\n", npregs);
-    for (j = 0; j < nlocals; j++) {
-      unsigned r = jinfo->jregs->r_local[j];
-      if (r) {
-	unsigned typ = (locals_info[j] >> LOCAL_INT) & 0x1f;
-	fprintf(stderr, "  ARM Reg R%d -> local %d (type = %s)\n", r, j, local_types[LOG2(typ)]);
-      }
-    }
-  }
-#endif
-}
-
-//-------------------------------------------------------------------------------------
-
-#define Thumb2		1
-#define ThumbEE		0
-
-#define	DA	0
-#define	IA	1
-#define DB	2
-#define IB	3
-
-#define	PUSH_ED	0
-#define PUSH_EA	1
-#define	PUSH_FD	2
-#define	PUSH_FA	3
-
-#define	POP_FA	0
-#define	POP_FD	1
-#define	POP_EA	2
-#define	POP_ED	3
-
-#define ROR(imm, sh) (((imm) >> (sh)) | ((imm) << (32 - (sh))))
-#define ROL(imm, sh) (((imm) << (sh)) | ((imm) >> (32 - (sh))))
-
-#define abs(i) ((i) < 0 ? -(i) : (i))
-#define U(i) ((i) < 0 ? 0 : 1)
-
-#define LS_STR		0
-#define	LS_STRB		1
-#define	LS_STRH		2
-#define LS_LDRSB	3
-#define	LS_LDR		4
-#define LS_LDRB		5
-#define	LS_LDRH		6
-#define LS_LDRSH	7
-
-#define LS_IS_LDR(op)	((op) >= LS_LDRSB)
-#define LS_IS_WORD(op)	(((op) & 3) == LS_STR)
-#define LS_IS_BYTE(op)	(((op) & 3) == LS_STRB || (op) == LS_LDRSB)
-#define LS_IS_HW(op)	(((op) & 3) == LS_STRH || (op) == LS_LDRSH)
-
-static const unsigned t_ls_ops[16] = {
-	0x5000,		0xf8400000,
-	0x5400,		0xf8000000,
-	0x5200,		0xf8200000,
-	0x5600,		0xf9100000,
-	0x5800,		0xf8500000,
-	0x5c00,		0xf8100000,
-	0x5a00,		0xf8300000,
-	0x5e00,		0xf9300000,
-};
-
-#define DP_ADC	0
-#define DP_ADD	1
-#define DP_AND	2
-#define DP_ASR	3
-#define DP_BIC	4
-#define DP_CMN	5
-#define DP_CMP	6
-#define DP_EOR	7
-#define DP_LSL	8
-#define DP_LSR	9
-#define DP_MOV	10
-#define DP_MVN	11
-#define DP_ORN	12
-#define DP_ORR	13
-#define DP_ROR	14
-#define DP_RSB	15
-#define DP_SBC	16
-#define DP_SUB	17
-#define DP_TEQ	18
-#define DP_TST	19
-#define DP_MUL	20
-
-static const unsigned n_ops[] = {
-	DP_SBC,		// ADC	x, y == SBC x, ~y
-	DP_SUB,		// ADD	x, y == SUB x, -y
-	DP_BIC,		// AND	x, y == BIX x, ~y
-	(unsigned)-1,	// ASR
-	DP_AND,		// BIC	x, y == AND x, ~y
-	DP_CMP,		// CMN	x, y == CMP x, -y
-	DP_CMN,		// CMP	x, y == CMN x, -y
-	(unsigned)-1,	// EOR
-	(unsigned)-1,	// LSL
-	(unsigned)-1,	// LSR
-	DP_MVN,		// MOV	x, y == MVN x, ~y
-	DP_MOV,		// MVN	x, y == MOV x, ~y
-	DP_ORR,		// ORN	x, y == ORR x, ~y
-	DP_ORN,		// ORR	x, y == ORN x, ~y
-	(unsigned)-1,	// ROR
-	(unsigned)-1,	// RSB
-	DP_ADC,		// SBC	x, y == ADC x, ~y
-	DP_ADD,		// ADD	x, y == SUB x, -y
-	(unsigned)-1,	// TEQ
-	(unsigned)-1,	// TST
-	(unsigned)-1,	// MUL
-};
-
-#define N_OP(op)	n_ops[(op)]
-
-static const unsigned t_dop_ops[] = {
-//	Rd, Rm, #N	Rd, Rn, Rm
-	0xf1400000,	0xeb400000,	// ADC
-	0xf1000000,	0xeb000000,	// ADD
-	0xf0000000,	0xea000000,	// AND
-	0xea4f0020,	0xfa40f000,	// ASR
-	0xf0200000,	0xea200000,	// BIC
-	0xf1100f00,	0xeb100f00,	// CMN
-	0xf1b00f00,	0xebb00f00,	// CMP
-	0xf0800000,	0xea800000,	// EOR
-	0xea4f0000,	0xfa00f000,	// LSL
-	0xea4f0010,	0xfa20f000,	// LSR
-	0xf04f0000,	0xea4f0000,	// MOV
-	0xf06f0000,	0xea6f0000,	// MVN
-	0xf0600000,	0xea600000,	// ORN
-	0xf0400000,	0xea400000,	// ORR
-	0xea4f0030,	0xfa6f0000,	// ROR
-	0xf1c00000,	0xebc00000,	// RSB
-	0xf1600000,	0xeb600000,	// SBC
-	0xf1a00000,	0xeba00000,	// SUB
-	0xf0900f00,	0xea900f00,	// TEQ
-	0xf0100f00,	0xea100f00,	// TST
-	(unsigned)-1,	0xfb00f000,	// MUL
-};
-
-#define DP_IMM(op)	t_dop_ops[(op)*2]
-#define DP_REG(op)	t_dop_ops[(op)*2+1]
-
-#define VP_ADD	0
-#define VP_SUB	1
-#define VP_MUL	2
-#define VP_DIV	3
-#define VP_SQRT 4
-
-static const unsigned t_vop_ops[] = {
-	0xee300a00,			// VADD
-	0xee300a40,			// VSUB
-	0xee200a00,			// VMUL
-	0xee800a00,			// VDIV
-	0xeeb10bc0			// VSQRT
-};
-
-#define VP_REG(op)	t_vop_ops[op]
-
-#define T1_LS_OP(op)	t_ls_ops[(op)*2]
-#define T2_LS_OP(op)	t_ls_ops[(op)*2+1]
-
-#define SHIFT_LSL	0
-#define SHIFT_LSR	1
-#define SHIFT_ASR	2
-#define SHIFT_ROR	3
-#define SHIFT_RRX	3
-
-//------------------------------------------------------------------------------------
-
-#define TBIT 1
-
-#define E_STR_IMM6(src, imm6)		(0xce00 | ((imm6)<<3) | (src))
-#define E_LDR_IMM6(dst, imm6)		(0xcc00 | ((imm6)<<3) | (dst))
-#define E_LDR_IMM5(dst, imm5)		(0xcb00 | ((imm5)<<3) | (dst))
-#define E_LDR_IMM3(dst, base, imm3)	(0xc800 | ((imm3)<<6) | ((base) << 3) | (dst))
-
-#define T_MOV_IMM8(r, imm8)		(0x2000 | ((r)<<8) | (imm8))
-#define T_MOV_BYTELANE(r, typ, b)	(0xf04f0000 | ((typ) << 12) | ((r) << 8) | (b))
-#define T_MOV_ROT_IMM(r, ror, imm)	\
-		(0xf04f0000 | (((ror) & 0x10) << (26-4)) | (((ror) & 0xe) << (12-1)) |	\
-		(((ror) & 1) << 7) | ((r) << 8) | ((imm) & 0x7f))
-#define T_MOVW_IMM16(r, imm)		\
-		(0xf2400000 | (((imm) & 0xf000) << (16-12)) | (((imm) & 0x800) << (26-11)) | \
-		(((imm) & 0x700) << (12-8)) | ((imm) & 0xff) | ((r) << 8))
-#define T_MOVT_IMM16(r, imm)		\
-		(0xf2c00000 | (((imm) & 0xf000) << (16-12)) | (((imm) & 0x800) << (26-11)) | \
-		(((imm) & 0x700) << (12-8)) | ((imm) & 0xff) | ((r) << 8))
-#define T_MVN_BYTELANE(r, typ, b)	(0xf06f0000 | ((typ) << 12) | ((r) << 8) | (b))
-#define T_MVN_ROT_IMM(r, ror, imm)	(0xf06f0000 | (((ror) & 0x10) << (26-4)) |	\
-		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((r) << 8) | ((imm) & 0x7f))
-
-#define T_ORR_ROT_IMM(dst, src, ror, imm)	(0xf0400000 | (((ror) & 0x10) << (26-4)) | \
-		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((src) << 16) |	\
-		((dst) << 8) | ((imm) & 0x7f))
-#define T_ORN_ROT_IMM(dst, src, ror, imm)	(0xf0600000 | (((ror) & 0x10) << (26-4)) | \
-		(((ror) & 0xe) << (12-1)) | (((ror) & 1) << 7) | ((src) << 16) |	\
-		((dst) << 8) | ((imm) & 0x7f))
-
-#define T_STR_IMM5(src, base, imm5)	(0x6000 | ((imm5) << 6) | ((base) << 3) | (src))
-#define T_STR_SP_IMM8(src, imm8)	(0x9000 | ((src) << 8) | (imm8))
-#define T_STR_IMM12(src, base, imm12)	(0xf8c00000 | ((src)<<12) | ((base)<<16) | (imm12))
-#define T_STR_IMM8(src, base, imm8, pre, wb)	(0xf8400800 | ((src)<<12) | 		\
-		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
-
-#define T_LDR_IMM5(dst, base, imm5)	(0x6800 | ((imm5) << 6) | ((base) << 3) | (dst))
-#define T_LDR_SP_IMM8(src, imm8)	(0x9800 | ((dst) << 8) | (imm8))
-#define T_LDR_IMM12(dst, base, imm12)	(0xf8d00000 | ((dst)<<12) | ((base)<<16) | (imm12))
-#define T_LDR_IMM8(src, base, imm8, pre, wb)	(0xf8500800 | ((dst)<<12) | 		\
-		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
-
-#define T_STRB_IMM5(src, base, imm5)	(0x7000 | ((imm5) << 6) | ((base) << 3) | (src))
-#define T_STRB_IMM12(src, base, imm12)	(0xf8800000 | ((src)<<12) | ((base)<<16) | (imm12))
-#define T_STRB_IMM8(src, base, imm8, pre, wb)	(0xf8000800 | ((src)<<12) | 		\
-		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
-
-#define T_LDRB_IMM5(dst, base, imm5)	(0x7800 | ((imm5) << 6) | ((base) << 3) | (dst))
-#define T_LDRB_IMM12(dst, base, imm12)	(0xf8900000 | ((dst)<<12) | ((base)<<16) | (imm12))
-#define T_LDRB_IMM8(dst, base, imm8, pre, wb)	(0xf8100800 | ((dst)<<12) | 		\
-		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
-
-#define T_STRH_IMM5(dst, base, imm5)	(0x8000 | ((imm5) << 6) | ((base) << 3) | (dst))
-#define T_STRH_IMM12(dst, base, imm12)	(0xf8a00000 | ((dst)<<12) | ((base)<<16) | (imm12))
-#define T_STRH_IMM8(dst, base, imm8, pre, wb)	(0xf8200800 | ((dst)<<12) | 		\
-		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
-
-#define T_LDRH_IMM5(dst, base, imm5)	(0x8800 | ((imm5) << 6) | ((base) << 3) | (dst))
-#define T_LDRH_IMM12(dst, base, imm12)	(0xf8b00000 | ((dst)<<12) | ((base)<<16) | (imm12))
-#define T_LDRH_IMM8(dst, base, imm8, pre, wb)	(0xf8300800 | ((dst)<<12) | 		\
-		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
-
-#define T_LDRSH_IMM12(dst, base, imm12)	(0xf9b00000 | ((dst)<<12) | ((base)<<16) | (imm12))
-#define T_LDRSH_IMM8(dst, base, imm8, pre, wb)	(0xf9300800 | ((dst)<<12) | 		\
-		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
-
-#define T_LDRSB_IMM12(dst, base, imm12)	(0xf9900000 | ((dst)<<12) | ((base)<<16) | (imm12))
-#define T_LDRSB_IMM8(dst, base, imm8, pre, wb)	(0xf9100800 | ((dst)<<12) | 		\
-		((base)<<16) | ((pre)<<10) | (U(imm8)<<9) | ((wb)<<8) | abs(imm8))
-
-#define T_LDRD_IMM(lo, hi, base, imm8, pre, wb)	(0xe8500000 | ((base)<<16) |		\
-		((lo) << 12) | ((hi)<<8) | ((pre)<<24) | (U(imm8)<<23) | ((wb)<<21) | abs(imm8))
-#define T_STRD_IMM(lo, hi, base, imm8, pre, wb)	(0xe8400000 | ((base)<<16) |		\
-		((lo) << 12) | ((hi)<<8) | ((pre)<<24) | (U(imm8)<<23) | ((wb)<<21) | abs(imm8))
-
-#define T_LDREX(dst, base, off) (0xe8500f00 | ((base) << 16) | ((dst) << 12) | ((off) >> 2))
-#define T_STREX(dst, src, base, off) (0xe8400000 | ((base) << 16) | \
-		((src) << 12) | ((dst) << 8) | ((off >> 2)))
-
-#define T_LDREXD(dst1, dst2, base) (0xe8d0007f | ((base) << 16) | ((dst1) << 12) | (dst2 << 8))
-#define T_STREXD(dst, src1, src2, base) (0xe8c00070 | ((base) << 16) | ((src1) << 12) | (src2 << 8) | dst)
-
-#define T_STM8(base, regset)		(0xc000 | ((base) << 8) | (regset))
-#define T_STM16(base, regset, st, wb)	(0xe8000000 | ((st) << 23) | ((wb) << 21) |	\
-		((base) << 16) | (regset))
-
-#define T_LDM8(base, regset)		(0xc800 | ((base) << 8) | (regset))
-#define	T_LDM16(base, regset, st, wb)	(0xe8100000 | ((st) << 23) | ((wb) << 21) |	\
-		((base) << 16) | (regset))
-#define T_POP(regset)	(0xbc00 | (((regset & (1<<ARM_PC)) >> ARM_PC) << 8) | (regset & 0xff))
-#define T_PUSH(regset)	(0xb400 | (((regset & (1<<ARM_LR)) >> ARM_LR) << 8) | (regset & 0xff))
-
-#define	T1_LDR_STR_REG(op, xfer, base, off) 	((op) | ((off) << 6) | ((base) << 3) | (xfer))
-#define T2_LDR_STR_REG(op, xfer, base, off, sh)	((op) | ((base) << 16) | ((xfer) << 12) | \
-		((sh)<<4) | (off))
-
-#define T_CHKA(size, idx)		(0xca00 | (((size) & 8) << (7-3)) | ((idx) << 3) | ((size) & 7))
-#define T_HBL(handler)			(0xc300 | (handler))
-#define T_MISC_CONTROL(op, option)	(0xf3bf8f00 | ((op)<<4) | option)
-#define T_ENTER_LEAVE(enter)		(T_MISC_CONTROL(enter, 0xf))
-#define T_DMB(option)			(T_MISC_CONTROL(5, option))
-
-#define T1_ADD_IMM(dst, src, imm3)	(0x1c00 | ((imm3) << 6) | ((src) << 3) | (dst))
-#define T2_ADD_IMM(r, imm8)		(0x3000 | ((r) << 8) | (imm8))
-#define T3_ADD_BYTELANE(dst, src, typ, b) (0xf1000000 | ((src) << 16) | ((typ) << 12) | \
-		((dst) << 8) | (b))
-#define T3_ADD_ROT_IMM(dst, src, ror, imm) (0xf1000000 | ((src) << 16) | ((dst) << 8) | \
-		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
-		((imm) & 0x7f))
-#define T4_ADD_IMM(dst, src, imm)	(0xf2000000 | ((src) << 16) | ((dst) << 8) | \
-		(((imm) & 0x800) << (26-11)) | (((imm) & 0x700) << (12-8)) | ((imm) & 0xff))
-
-#define T1_SUB_IMM(dst, src, imm3)	(0x1e00 | ((imm3) << 6) | ((src) << 3) | (dst))
-#define T2_SUB_IMM(r, imm8)		(0x3800 | ((r) << 8) | (imm8))
-#define T3_SUB_BYTELANE(dst, src, typ, b) (0xf1a00000 | ((src) << 16) | ((typ) << 12) | \
-		((dst) << 8) | (b))
-#define T3_SUB_ROT_IMM(dst, src, ror, imm) (0xf1a00000 | ((src) << 16) | ((dst) << 8) | \
-		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
-		((imm) & 0x7f))
-#define T4_SUB_IMM(dst, src, imm)	(0xf2a00000 | ((src) << 16) | ((dst) << 8) | \
-		(((imm) & 0x800) << (26-11)) | (((imm) & 0x700) << (12-8)) | ((imm) & 0xff))
-
-#define T_DOP_BYTELANE(op, dst, src, typ, b)	((op) | ((dst) << 8) | ((src) << 16) | \
-		((typ) << 12) | (b))
-#define T_DOP_ROT_IMM(op, dst, src, ror, imm)	((op) | ((dst) << 8) | ((src) << 16) | \
-		(((ror) & 0x10) << (26-4)) | (((ror) & 0x0e) << (12-1)) | (((ror) & 1) << 7) | \
-		((imm) & 0x7f))
-#define T_SHIFT_IMM(op, dst, src, imm)	((op) | ((dst) << 8) | (src) | \
-		(((imm) & 3) << 6) | (((imm) & 0x1c) << (12-2)))
-#define T_DOP_REG(op, dst, lho, rho, st, sh)	((op) | ((dst) << 8) | ((lho) << 16) | (rho) | \
-		((st) << 4) | (((sh) & 0x1c) << (12-2)) | (((sh) & 3) << 6))
-#define T3_ADD_BYTELANE(dst, src, typ, b) (0xf1000000 | ((src) << 16) | ((typ) << 12) | \
-		((dst) << 8) | (b))
-
-#define T_CMP_IMM(src, imm)		(0x2800 | ((src) << 8) | (imm))
-#define T_CMP_REG(lho, rho)		(0x4280 | ((rho) << 3) | (lho))
-
-#define T_NEG(dst, src)		(0x4240 | (dst) | ((src) << 3))
-#define T_MVN(dst, src)		(0x43c0 | (dst) | ((src) << 3))
-#define T_MOV(dst, src)		(0x4600 | (((dst) & 8) << (7-3)) | ((src) << 3) | ((dst) & 7))
-
-#define T_VMOVS_TOARM(dst, src)	\
-	(0xee100a10 | ((dst) << 12) | (((src) & 1) << 7) | (((src) & 0x1e)<<(16-1)))
-#define T_VMOVS_TOVFP(dst, src) \
-	(0xee000a10 | ((src) << 12) | (((dst) & 1) << 7) | (((dst) & 0x1e)<<(16-1)))
-
-#define T_VMOVD_TOARM(dst_lo, dst_hi, src) \
-  (0xec500b10 | ((dst_lo) << 12) | ((dst_hi) << 16) | (((src) & 0x10)<<(5-4)) | ((src) & 0x0f))
-#define T_VMOVD_TOVFP(dst, src_lo, src_hi) \
-  (0xec400b10 | ((src_lo) << 12) | ((src_hi) << 16) | (((dst) & 0x10)<<(5-4)) | ((dst) & 0x0f))
-
-// VFP reg to VFP re move.
-#define T_VMOVD_VFP_TOVFP(dst, src) (0xeeb00b40 | (((dst) & 0x0f) << 12) | ((src) & 0x0f))
-
-#define T_VOP_REG_S(op, dst, lho, rho)	((op) |				\
-		(((dst) & 1) << 22) | (((dst) & 0x1e) << (12-1)) | 	\
-		(((lho) & 1) << 7) | (((lho) & 0x1e) << (16-1))	 |	\
-		(((rho) & 1) << 5) | (((rho) & 0x1e) >> 1))
-#define T_VOP_REG_D(op, dst, lho, rho)	((op) |	(1 << 8) |		\
-		(((dst) & 0x10) << (22-4)) | (((dst) & 0xf) << 12) | 	\
-		(((lho) & 0x10) << (7-4)) | (((lho) & 0xf) << 16)   |	\
-		(((rho) & 0x10) << (5-4)) | ((rho) & 0xf))
-
-#define T_VCMP_S(lho, rho, e)		(0xeeb40a40 | ((e) << 7) |	\
-		(((lho) & 1) << 22) | (((lho) & 0x1e) << (12-1)) |	\
-		(((rho) & 1) << 5) | (((rho) & 0x1e) >>1))
-#define T_VCMP_D(lho, rho, e)		(0xeeb40b40 | ((e) << 7) |	\
-		(((lho) & 0x10) << (22-4)) | (((lho) & 0x0f) << 12) |	\
-		(((rho) & 0x10) << (5-4)) | ((rho) & 0x0f))
-#define T_VMRS(dst)	(0xeef10a10 | ((dst) << 12))
-
-#define T_MLA(res, lho, rho, a) \
-		(0xfb000000 | ((res) << 8) | ((lho) << 16) | (rho) | ((a) << 12))
-#define T_UMULL(res_lo, res_hi, lho, rho) \
-		(0xfba00000 | ((res_lo) << 12) | ((res_hi) << 8) | ((lho) << 16) | (rho))
-
-#define T_BX(src)		(0x4700 | ((src) << 3))
-#define T_TBH(base, idx)	(0xe8d0f010 | ((base) << 16) | (idx))
-
-#define T_SXTB(dst, src)	(0xb240 | ((src) << 3) | (dst))
-#define T_SXTH(dst, src)	(0xb200 | ((src) << 3) | (dst))
-#define T2_SXTB(dst, src)	(0xfa4ff080 | ((dst) << 8) | (src))
-#define T2_SXTH(dst, src)	(0xfa0ff080 | ((dst) << 8) | (src))
-#define T_UXTH(dst, src)	(0xb280 | ((src) << 3) | (dst))
-#define T2_UXTH(dst, src)	(0xfa1ff080 | ((dst) << 8) | (src))
-
-int out_16(CodeBuf *codebuf, u32 s)
-{
-  if (codebuf->idx >= codebuf->limit)
-	longjmp(compiler_error_env, COMPILER_RESULT_FATAL);
-  codebuf->codebuf[codebuf->idx++] = s;
-  return 0;
-}
-
-int out_16x2(CodeBuf *codebuf, u32 sx2)
-{
-  unsigned s1 = sx2 >> 16;
-  unsigned s2 = sx2 & 0xffff;
-
-  out_16(codebuf, s1);
-  return out_16(codebuf, s2);
-}
-
-int out_32(CodeBuf *codebuf, u32 w)
-{
-  if (codebuf->idx + 2 > codebuf->limit)
-	longjmp(compiler_error_env, COMPILER_RESULT_FATAL);
-  *(u32 *)&(codebuf->codebuf[codebuf->idx]) = w;
-  codebuf->idx += 2;
-  return 0;
-}
-
-u32 out_pos(CodeBuf *codebuf)
-{
-  return (u32)&(codebuf->codebuf[codebuf->idx]);
-}
-
-u32 out_loc(CodeBuf *codebuf)
-{
-  return codebuf->idx * 2;
-}
-
-u32 out_align(CodeBuf *codebuf, unsigned align)
-{
-  while ((out_pos(codebuf) & (align-1)) != 0) out_16(codebuf, 0);
-  return out_pos(codebuf);
-}
-
-u32 out_align_offset(CodeBuf *codebuf, unsigned align, unsigned offset)
-{
-  while ((out_pos(codebuf) & (align-1)) != offset) out_16(codebuf, 0);
-  return out_pos(codebuf);
-}
-
-int thumb_single_shift(unsigned imm)
-{
-  unsigned lsl;
-
-  if (!imm) return -1;
-  lsl = 0;
-  while (!(imm & 0x80000000)) {
-    imm <<= 1;
-    lsl++;
-  }
-  if (lsl >= 24) return -1;
-  if ((imm & 0xff000000) == imm) return lsl+8;
-  return -1;
-}
-
-int thumb_bytelane(u32 imm)
-{
-    unsigned b1 = imm & 0xff;
-    unsigned b2 = (imm >> 8) & 0xff;
-    unsigned b3 = (imm >> 16) & 0xff;
-    unsigned b4 = imm >> 24;
-    int mov_type = -1;
-
-    if (b1 == b3 && b2 == 0 && b4 == 0) mov_type = 1;
-    if (b1 == b2 && b1 == b3 && b1 == b4) mov_type = 3;
-    if (b2 == b4 && b1 == 0 && b3 == 0) mov_type = 2;
-    if (imm < 256) mov_type = 0;
-    return mov_type;
-}
-
-int mov_imm(CodeBuf *codebuf, Reg r, u32 imm)
-{
-  int mov_type, rol;
-
-  if (Thumb2) {
-    if (r < ARM_R8 && imm < 256)
-      return out_16(codebuf, T_MOV_IMM8(r, imm));
-    mov_type = thumb_bytelane(imm);
-    if (mov_type >= 0) {
-      if (mov_type == 2) imm >>= 8;
-      return out_16x2(codebuf, T_MOV_BYTELANE(r, mov_type, (imm & 0xff)));
-    }
-    mov_type = thumb_bytelane(~imm);
-    if (mov_type >= 0) {
-      imm = ~imm;
-      if (mov_type == 2) imm >>= 8;
-      return out_16x2(codebuf, T_MVN_BYTELANE(r, mov_type, (imm & 0xff)));
-    }
-    rol = thumb_single_shift(imm);
-    if (rol >= 0)
-      return out_16x2(codebuf, T_MOV_ROT_IMM(r, rol, ROL(imm, rol)));
-    rol = thumb_single_shift(~imm);
-    if (rol >= 0)
-      return out_16x2(codebuf, T_MVN_ROT_IMM(r, rol, ROL(~imm, rol)));
-    if ((imm & ~0xffff) == 0)
-      return out_16x2(codebuf, T_MOVW_IMM16(r, imm & 0xffff));
-    if (r < ARM_R8) {
-      rol = thumb_single_shift(imm & ~0xff);
-      if (rol >= 0) {
-	out_16(codebuf, T_MOV_IMM8(r, imm & 0xff));
-	return out_16x2(codebuf, T_ORR_ROT_IMM(r, r, rol, ROL(imm & ~0xff, rol)));
-      }
-    }
-    out_16x2(codebuf, T_MOVW_IMM16(r, imm & 0xffff));
-    return out_16x2(codebuf, T_MOVT_IMM16(r, imm >> 16));
-  }
-  J_Unimplemented();
-}
-
-int load_store_reg_no_wb(CodeBuf *codebuf, u32 op, Reg xfer, Reg base, Reg offset,
-							  u32 shift, int pre)
-{
-  if (pre) {
-    if (xfer < ARM_R8 && base < ARM_R8 && offset < ARM_R8) {
-      if (ThumbEE) {
-	if ((shift == 0 && LS_IS_BYTE(op)) || (shift == 1 && LS_IS_HW(op)) ||
-							(shift == 2 && LS_IS_WORD(op)))
-	  return out_16(codebuf, T1_LDR_STR_REG(T1_LS_OP(op), xfer, base, offset));
-      } else if (shift == 0)
-	return out_16(codebuf, T1_LDR_STR_REG(T1_LS_OP(op), xfer, base, offset));
-    }
-    if (shift < 4)
-      return out_16x2(codebuf, T2_LDR_STR_REG(T2_LS_OP(op), xfer, base, offset, shift));
-  }
-  J_Unimplemented();
-}
-
-static int add_reg(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho);
-
-int load_store_reg(CodeBuf *codebuf, u32 op, Reg xfer, Reg base, Reg offset,
-							  u32 shift, int pre, int wb)
-{
-  int rc = load_store_reg_no_wb(codebuf, op, xfer, base, offset, shift, pre);
-  if (wb) {
-    return add_reg(codebuf, base, base, offset);
-  }
-  return rc;
-}
-
-int str_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, u32 shift, int pre, int wb)
-{
-  return load_store_reg(codebuf, LS_STR, src, base, offset, shift, pre, wb);
-}
-
-int ldr_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, u32 shift, int pre, int wb)
-{
-  return load_store_reg(codebuf, LS_LDR, dst, base, offset, shift, pre, wb);
-}
-
-int strb_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, u32 shift, int pre, int wb)
-{
-  return load_store_reg(codebuf, LS_STRB, src, base, offset, shift, pre, wb);
-}
-
-int ldrb_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, u32 shift, int pre, int wb)
-{
-  return load_store_reg(codebuf, LS_LDRB, dst, base, offset, shift, pre, wb);
-}
-
-int strh_reg(CodeBuf *codebuf, Reg src, Reg base, Reg offset, u32 shift, int pre, int wb)
-{
-  return load_store_reg(codebuf, LS_STRH, src, base, offset, shift, pre, wb);
-}
-
-int ldrh_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, u32 shift, int pre, int wb)
-{
-  return load_store_reg(codebuf, LS_LDRH, dst, base, offset, shift, pre, wb);
-}
-
-int ldrsh_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, u32 shift, int pre, int wb)
-{
-  return load_store_reg(codebuf, LS_LDRSH, dst, base, offset, shift, pre, wb);
-}
-
-int ldrsb_reg(CodeBuf *codebuf, Reg dst, Reg base, Reg offset, u32 shift, int pre, int wb)
-{
-  return load_store_reg(codebuf, LS_LDRSB, dst, base, offset, shift, pre, wb);
-}
-
-int ldrex_imm(CodeBuf *codebuf, Reg dst, Reg base, unsigned offset)
-{
-  if (Thumb2) {
-    if ((offset & 3) == 0 && offset < 256 * 4) {
-      return out_16x2(codebuf, T_LDREX(dst, base, offset));
-    }
-  }
-  J_Unimplemented();
-}
-
-int strex_imm(CodeBuf *codebuf, Reg dst, Reg src, Reg base, unsigned offset)
-{
-  if (Thumb2) {
-    if ((offset & 3) == 0 && offset < 256 * 4) {
-      return out_16x2(codebuf, T_STREX(dst, src, base, offset));
-    }
-  }
-  J_Unimplemented();
-}
-
-int ldrexd(CodeBuf *codebuf, Reg dst0, Reg dst1, Reg base)
-{
-  if (Thumb2) {
-    return out_16x2(codebuf, T_LDREXD(dst0, dst1, base));
-  }
-  J_Unimplemented();
-}
-
-int strexd(CodeBuf *codebuf, Reg dst, Reg src0, Reg src1, Reg base)
-{
-  if (Thumb2) {
-    return out_16x2(codebuf, T_STREXD(dst, src0, src1, base));
-  }
-  J_Unimplemented();
-}
-
-int str_imm(CodeBuf *codebuf, Reg src, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (pre && !wb && offset >= 0) {
-      if (base < ARM_R8 && src < ARM_R8 && uoff < 128 && (uoff & 3) == 0)
-	return out_16(codebuf, T_STR_IMM5(src, base, uoff>>2));
-      if (base == ARM_SP && src < ARM_R8 && uoff < 1024 && (uoff &3) ==0)
-	return out_16(codebuf, T_STR_SP_IMM8(src, uoff>>2));
-      if (ThumbEE && base == ARM_R9 && src < ARM_R8 && uoff < 256 && (uoff & 3) == 0)
-	return out_16(codebuf, E_STR_IMM6(src, uoff>>2));
-      if (uoff < (1 << 12))
-	return out_16x2(codebuf, T_STR_IMM12(src, base, uoff));
-    } else if (offset < 256 && offset > -256)
-	return out_16x2(codebuf, T_STR_IMM8(src, base, offset, pre, wb));
-    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
-    mov_imm(codebuf, ARM_IP, offset);
-    return str_reg(codebuf, src, base, ARM_IP, 0, pre, wb);
-  }
-  J_Unimplemented();
-}
-
-int ldr_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (pre && !wb && offset >= 0) {
-      if (base < ARM_R8 && dst < ARM_R8 && uoff < 128 && (uoff & 3) ==0)
-	return out_16(codebuf, T_LDR_IMM5(dst, base, uoff>>2));
-      if (base == ARM_SP && dst < ARM_R8 && uoff < 1024 & (uoff & 3) == 0)
-	return out_16(codebuf, T_LDR_SP_IMM8(dst, uoff>>2));
-      if (ThumbEE && base == ARM_R9 && dst < ARM_R8 && uoff < 256 && (uoff & 3) == 0)
-	return out_16(codebuf, E_LDR_IMM6(dst, uoff>>2));
-      if (ThumbEE && base == ARM_R10 && dst < ARM_R8 && uoff < 128 && (uoff & 3) == 0)
-	return out_16(codebuf, E_LDR_IMM5(dst, uoff>>2));
-      if (uoff < (1 << 12))
-	return out_16x2(codebuf, T_LDR_IMM12(dst, base, uoff));
-    } else {
-      if (ThumbEE && pre && !wb && offset <= 0 && offset > -32 && (uoff & 3) == 0 &&
-							base < ARM_R8 && dst < ARM_R8)
-	return out_16(codebuf, E_LDR_IMM3(dst, base, -offset >> 2));
-      if (offset < 256 && offset > -256)
-	return out_16x2(codebuf, T_LDR_IMM8(dst, base, offset, pre, wb));
-    }
-    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
-    mov_imm(codebuf, ARM_IP, offset);
-    return ldr_reg(codebuf, dst, base, ARM_IP, 0, pre, wb);
-  }
-  J_Unimplemented();
-}
-
-int strb_imm(CodeBuf *codebuf, Reg src, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (pre && !wb && offset >= 0) {
-      if (base < ARM_R8 && src < ARM_R8 && uoff < 32)
-	return out_16(codebuf, T_STRB_IMM5(src, base, uoff));
-      if (uoff < (1 << 12))
-	return out_16x2(codebuf, T_STRB_IMM12(src, base, uoff));
-    } else if (offset < 256 && offset > -256)
-	return out_16x2(codebuf, T_STRB_IMM8(src, base, offset, pre, wb));
-    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
-    mov_imm(codebuf, ARM_IP, offset);
-    return strb_reg(codebuf, src, base, ARM_IP, 0, pre, wb);
-  }
-  J_Unimplemented();
-}
-
-int ldrb_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (pre && !wb && offset >= 0) {
-      if (base < ARM_R8 && dst < ARM_R8 && uoff < 32)
-	return out_16(codebuf, T_LDRB_IMM5(dst, base, uoff));
-      if (uoff < (1 << 12))
-	return out_16x2(codebuf, T_LDRB_IMM12(dst, base, uoff));
-    } else if (offset < 256 && offset > -256)
-	return out_16x2(codebuf, T_LDRB_IMM8(dst, base, offset, pre, wb));
-    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
-    mov_imm(codebuf, ARM_IP, offset);
-    return ldrb_reg(codebuf, dst, base, ARM_IP, 0, pre, wb);
-  }
-  J_Unimplemented();
-}
-
-int strh_imm(CodeBuf *codebuf, Reg src, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (pre && !wb && offset >= 0) {
-      if (base < ARM_R8 && src < ARM_R8 && uoff < 64 && (uoff & 1) == 0)
-	return out_16(codebuf, T_STRH_IMM5(src, base, uoff>>1));
-      if (uoff < (1 << 12))
-	return out_16x2(codebuf, T_STRH_IMM12(src, base, uoff));
-    } else if (offset < 256 && offset > -256)
-	return out_16x2(codebuf, T_STRH_IMM8(src, base, offset, pre, wb));
-    JASSERT(base != ARM_IP && src != ARM_IP, "src or base == IP in str_imm");
-    mov_imm(codebuf, ARM_IP, offset);
-    return strh_reg(codebuf, src, base, ARM_IP, 0, pre, wb);
-  }
-  J_Unimplemented();
-}
-
-int ldrh_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (pre && !wb && offset >= 0) {
-      if (base < ARM_R8 && dst < ARM_R8 && uoff < 64 && (uoff & 1) == 0)
-	return out_16(codebuf, T_LDRH_IMM5(dst, base, uoff>>1));
-      if (uoff < (1 << 12))
-	return out_16x2(codebuf, T_LDRH_IMM12(dst, base, uoff));
-    } else if (offset < 256 && offset > -256)
-	return out_16x2(codebuf, T_LDRH_IMM8(dst, base, offset, pre, wb));
-    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
-    mov_imm(codebuf, ARM_IP, offset);
-    return ldrh_reg(codebuf, dst, base, ARM_IP, 0, pre, wb);
-  }
-  J_Unimplemented();
-}
-
-int ldrsh_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (pre && !wb && offset >= 0) {
-      if (uoff < (1 << 12))
-	return out_16x2(codebuf, T_LDRSH_IMM12(dst, base, uoff));
-    } else if (offset < 256 && offset > -256)
-	return out_16x2(codebuf, T_LDRSH_IMM8(dst, base, offset, pre, wb));
-    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
-    mov_imm(codebuf, ARM_IP, offset);
-    return ldrsh_reg(codebuf, dst, base, ARM_IP, 0, pre, wb);
-  }
-  J_Unimplemented();
-}
-
-int ldrsb_imm(CodeBuf *codebuf, Reg dst, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (pre && !wb && offset >= 0) {
-      if (uoff < (1 << 12))
-	return out_16x2(codebuf, T_LDRSB_IMM12(dst, base, uoff));
-    } else if (offset < 256 && offset > -256)
-	return out_16x2(codebuf, T_LDRSB_IMM8(dst, base, offset, pre, wb));
-    JASSERT(base != ARM_IP, "base == IP in ldr_imm");
-    mov_imm(codebuf, ARM_IP, offset);
-    return ldrsb_reg(codebuf, dst, base, ARM_IP, 0, pre, wb);
-  }
-  J_Unimplemented();
-}
-
-int add_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm);
-
-int ldrd_imm(CodeBuf *codebuf, Reg dst_lo, Reg dst_hi, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (offset < 256 * 4 && offset > -256 * 4 && (offset & 3) == 0)
-      return out_16x2(codebuf, T_LDRD_IMM(dst_lo, dst_hi, base, offset>>2, pre, wb));
-    if (pre && !wb) {
-      add_imm(codebuf, ARM_IP, base, offset);
-      return out_16x2(codebuf, T_LDRD_IMM(dst_lo, dst_hi, ARM_IP, 0, 1, 0));
-    }
-  }
-  J_Unimplemented();
-}
-
-int strd_imm(CodeBuf *codebuf, Reg src_lo, Reg src_hi, Reg base, int offset, int pre, int wb)
-{
-  unsigned uoff;
-
-  if (!pre && !wb) pre = 1, offset = 0;
-  uoff = (unsigned)offset;
-  if (Thumb2) {
-    if (offset < 256 * 4 && offset > -256 * 4 && (offset & 3) == 0)
-      return out_16x2(codebuf, T_STRD_IMM(src_lo, src_hi, base, offset>>2, pre, wb));
-    if (pre && !wb) {
-      add_imm(codebuf, ARM_IP, base, offset);
-      return out_16x2(codebuf, T_STRD_IMM(src_lo, src_hi, ARM_IP, 0, 1, 0));
-    }
-  }
-  J_Unimplemented();
-}
-
-int stm(CodeBuf *codebuf, u32 regset, u32 base, u32 st, u32 wb)
-{
-  JASSERT(regset != 0, "regset != 0 in stm");
-  if (Thumb2) {
-    if (!ThumbEE && base < ARM_R8 && (regset & ~0xff) == 0 && st == IA && wb)
-      return out_16(codebuf, T_STM8(base, regset));
-    if (base == ARM_SP) {
-      if ((regset & ~0x40ff) == 0 && st == DB && wb)
-	return out_16(codebuf, T_PUSH(regset));
-    }
-    if ((regset & -regset) == regset)
-      return str_imm(codebuf, LOG2(regset), base, (st & 1) ? 4 : -4, (st & 2) >> 1, wb);
-    if (st == PUSH_EA || st == PUSH_FD)
-      return out_16x2(codebuf, T_STM16(base, regset, st, wb));
-    return out_16x2(codebuf, T_STM16(base, regset, st, wb));
-  }
-  J_Unimplemented();
-}
-
-int ldm(CodeBuf *codebuf, u32 regset, u32 base, u32 st, u32 wb)
-{
-  JASSERT(regset != 0, "regset != 0 in stm");
-  if (Thumb2) {
-    if (!ThumbEE && base < ARM_R8 && (regset & ~0xff) == 0 && st == IA && wb)
-      return out_16(codebuf, T_LDM8(base, regset));
-    if (base == ARM_SP) {
-      if ((regset & ~0x80ff) == 0 && st == IA && wb)
-	return out_16(codebuf, T_POP(regset));
-    }
-    if ((regset & -regset) == regset)
-      return ldr_imm(codebuf, LOG2(regset), base, (st & 1) ? 4 : -4, (st & 2) >> 1, wb);
-    if (st == POP_EA || st == POP_FD)
-      return out_16x2(codebuf, T_LDM16(base, regset, st, wb));
-  }
-  J_Unimplemented();
-}
-
-int dop_reg(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho, u32 sh_typ, u32 shift)
-{
-  unsigned s = 0;
-  if (op != DP_MUL) s = 1 << 20;
-//  JASSERT(dst != ARM_PC, "Terrible things happen if dst == PC && S bit set");
-  return out_16x2(codebuf, T_DOP_REG(DP_REG(op)|s, dst, lho, rho, sh_typ, shift));
-}
-
-int dop_reg_preserve(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho, u32 sh_typ, u32 shift)
-{
-  return out_16x2(codebuf, T_DOP_REG(DP_REG(op), dst, lho, rho, sh_typ, shift));
-}
-
-int sxtb(CodeBuf *codebuf, u32 dst, u32 src)
-{
-  if (dst < ARM_R8 && src < ARM_R8)
-    return out_16(codebuf, T_SXTB(dst, src));
-  return out_16x2(codebuf, T2_SXTB(dst, src));
-}
-
-int sxth(CodeBuf *codebuf, u32 dst, u32 src)
-{
-  if (dst < ARM_R8 && src < ARM_R8)
-    return out_16(codebuf, T_SXTH(dst, src));
-  return out_16x2(codebuf, T2_SXTH(dst, src));
-}
-
-int uxth(CodeBuf *codebuf, u32 dst, u32 src)
-{
-  if (dst < ARM_R8 && src < ARM_R8)
-    return out_16(codebuf, T_UXTH(dst, src));
-  return out_16x2(codebuf, T2_UXTH(dst, src));
-}
-
-int mov_reg(CodeBuf *codebuf, u32 dst, u32 src)
-{
-  if (dst == src) return 0;
-  if (dst == ARM_PC) return out_16(codebuf, T_BX(src));
-  return out_16(codebuf, T_MOV(dst, src));
-//  return dop_reg(codebuf, DP_MOV, dst, 0, src, SHIFT_LSL, 0);
-}
-
-int nop_16(CodeBuf *codebuf)
-{
-  return out_16(codebuf, T_MOV(ARM_R0, ARM_R0));
-}
-
-int nop_32(CodeBuf *codebuf)
-{
-  return dop_reg(codebuf, DP_MOV, ARM_R8, 0, ARM_R8, SHIFT_LSL, 0);
-}
-
-int mvn_reg(CodeBuf *codebuf, u32 dst, u32 src)
-{
-  if (dst < ARM_R8 && src < ARM_R8)
-    return out_16(codebuf, T_MVN(dst, src));
-  return dop_reg(codebuf, DP_MVN, dst, 0, src, SHIFT_LSL, 0);
-}
-
-int vmov_reg_s_toVFP(CodeBuf *codebuf, u32 dst, u32 src)
-{
-  return out_16x2(codebuf, T_VMOVS_TOVFP(dst, src));
-}
-
-int vmov_reg_s_toARM(CodeBuf *codebuf, u32 dst, u32 src)
-{
-  return out_16x2(codebuf, T_VMOVS_TOARM(dst, src));
-}
-
-int vmov_reg_d_toVFP(CodeBuf *codebuf, u32 dst, u32 src_lo, u32 src_hi)
-{
-  return out_16x2(codebuf, T_VMOVD_TOVFP(dst, src_lo, src_hi));
-}
-
-int vmov_reg_d_VFP_to_VFP(CodeBuf *codebuf, u32 dst, u32 src)
-{
-  return out_16x2(codebuf, T_VMOVD_VFP_TOVFP(dst, src));
-}
-
-int vmov_reg_d_toARM(CodeBuf *codebuf, u32 dst_lo, u32 dst_hi, u32 src)
-{
-  return out_16x2(codebuf, T_VMOVD_TOARM(dst_lo, dst_hi, src));
-}
-
-int vop_reg_s(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho)
-{
-  return out_16x2(codebuf, T_VOP_REG_S(VP_REG(op), dst, lho, rho));
-}
-
-int vop_reg_d(CodeBuf *codebuf, u32 op, u32 dst, u32 lho, u32 rho)
-{
-  return out_16x2(codebuf, T_VOP_REG_D(VP_REG(op), dst, lho, rho));
-}
-
-int vcmp_reg_s(CodeBuf *codebuf, u32 lho, u32 rho, unsigned e)
-{
-  return out_16x2(codebuf, T_VCMP_S(lho, rho, e));
-}
-
-int vcmp_reg_d(CodeBuf *codebuf, u32 lho, u32 rho, unsigned e)
-{
-  return out_16x2(codebuf, T_VCMP_D(lho, rho, e));
-}
-
-int vmrs(CodeBuf *codebuf, u32 dst)
-{
-  return out_16x2(codebuf, T_VMRS(dst));
-}
-
-int add_reg(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho)
-{
-  return dop_reg(codebuf, DP_ADD, dst, lho, rho, SHIFT_LSL, 0);
-}
-
-int cmp_reg(CodeBuf *codebuf, Reg lho, Reg rho)
-{
-  if (lho < ARM_R8 && rho < ARM_R8)
-    return out_16(codebuf, T_CMP_REG(lho, rho));
-  return dop_reg(codebuf, DP_CMP, 0x0f, lho, rho, SHIFT_LSL, 0);
-}
-
-int add_reg_shift(CodeBuf *codebuf, u32 dst, u32 lho, u32 rho, u2 sh_typ, u32 shift)
-{
-  return dop_reg(codebuf, DP_ADD, dst, lho, rho, sh_typ, shift);
-}
-
-int add_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
-{
-  int imm_type, rol;
-
-  if (imm == 0) return mov_reg(codebuf, dst, src);
-  if (Thumb2) {
-    if (dst < ARM_R8 && src < ARM_R8) {
-      if (imm < 8)
-	return out_16(codebuf, T1_ADD_IMM(dst, src, imm));
-      if (-imm < 8)
-	return out_16(codebuf, T1_SUB_IMM(dst, src, -imm));
-      if (src == dst) {
-	if (imm < 256)
-	  return out_16(codebuf, T2_ADD_IMM(src, imm));
-	if (-imm < 256)
-	  return out_16(codebuf, T2_SUB_IMM(src, -imm));
-      }
-    }
-    imm_type = thumb_bytelane(imm);
-    if (imm_type >= 0) {
-      if (imm_type == 2) imm >>= 8;
-      return out_16x2(codebuf, T3_ADD_BYTELANE(dst, src, imm_type, (imm & 0xff)));
-    }
-    imm_type = thumb_bytelane(-imm);
-    if (imm_type >= 0) {
-      imm = -imm;
-      if (imm_type == 2) imm >>= 8;
-      return out_16x2(codebuf, T3_SUB_BYTELANE(dst, src, imm_type, (imm & 0xff)));
-    }
-    rol = thumb_single_shift(imm);
-    if (rol >= 0)
-      return out_16x2(codebuf, T3_ADD_ROT_IMM(dst, src, rol, ROL(imm, rol)));
-    rol = thumb_single_shift(-imm);
-    if (rol >= 0)
-      return out_16x2(codebuf, T3_SUB_ROT_IMM(dst, src, rol, ROL(-imm, rol)));
-    if (imm < (1 << 12))
-      return out_16x2(codebuf, T4_ADD_IMM(dst, src, imm));
-    if (-imm < (1 << 12))
-      return out_16x2(codebuf, T4_SUB_IMM(dst, src, -imm));
-    mov_imm(codebuf, ARM_IP, imm);
-    return add_reg(codebuf, dst, src, ARM_IP);
-  }
-  J_Unimplemented();
-}
-
-int sub_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
-{
-  return add_imm(codebuf, dst, src, -imm);
-}
-
-int dop_imm_s(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm, unsigned s)
-{
-    int imm_type, rol;
-    unsigned n_op, n_imm;
-
-    JASSERT(op == DP_ADC || op == DP_ADD || op == DP_AND || op == DP_BIC || op == DP_CMN ||
-		op == DP_CMP || op == DP_EOR || op == DP_MOV || op == DP_MVN ||
-		op == DP_ORN || op == DP_ORR || op == DP_RSB || op == DP_SBC ||
-		op == DP_SUB || op == DP_TEQ || op == DP_TST, "bad op");
-    if (op == DP_CMP || op == DP_CMN || op == DP_TEQ || op == DP_TST) dst = 0x0f;
-    if (op == DP_MOV || op == DP_MVN) src = 0x0f;
-    imm_type = thumb_bytelane(imm);
-    if (imm_type >= 0) {
-      if (imm_type == 2) imm >>= 8;
-      return out_16x2(codebuf, T_DOP_BYTELANE(DP_IMM(op)|s, dst, src, imm_type, (imm & 0xff)));
-    }
-    rol = thumb_single_shift(imm);
-    if (rol >= 0)
-      return out_16x2(codebuf, T_DOP_ROT_IMM(DP_IMM(op)|s, dst, src, rol, ROL(imm, rol)));
-    n_op = N_OP(op);
-    if (n_op != (unsigned)-1) {
-      n_imm = ~imm;
-      if (op == DP_ADD || op == DP_SUB || op == DP_CMP || op == DP_CMN) n_imm = -imm;
-      imm_type = thumb_bytelane(n_imm);
-      if (imm_type >= 0) {
-	if (imm_type == 2) n_imm >>= 8;
-	return out_16x2(codebuf, T_DOP_BYTELANE(DP_IMM(n_op)|s, dst, src, imm_type, (n_imm & 0xff)));
-      }
-      rol = thumb_single_shift(n_imm);
-      if (rol >= 0)
-	return out_16x2(codebuf, T_DOP_ROT_IMM(DP_IMM(n_op)|s, dst, src, rol, ROL(n_imm, rol)));
-    }
-    mov_imm(codebuf, ARM_IP, imm);
-    return out_16x2(codebuf, T_DOP_REG(DP_REG(op)|s, dst, src, ARM_IP, SHIFT_LSL, 0));
-}
-
-int dop_imm(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
-{
-    return dop_imm_s(codebuf, op, dst, src, imm, 1<<20);
-}
-
-int dop_imm_preserve(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
-{
-    return dop_imm_s(codebuf, op, dst, src, imm, 0);
-}
-
-int shift_imm(CodeBuf *codebuf, u32 op, u32 dst, u32 src, u32 imm)
-{
-    imm &= 31;
-    if (imm == 0)
-      return mov_reg(codebuf, dst, src);
-    else
-      return out_16x2(codebuf, T_SHIFT_IMM(DP_IMM(op), dst, src, imm));
-}
-
-int rsb_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
-{
-  if (dst < ARM_R8 && src < ARM_R8 && imm == 0)
-    return out_16(codebuf, T_NEG(dst, src));
-  return dop_imm(codebuf, DP_RSB, dst, src, imm);
-}
-
-int adc_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
-{
-  return dop_imm(codebuf, DP_ADC, dst, src, imm);
-}
-
-int asr_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
-{
-  return shift_imm(codebuf, DP_ASR, dst, src, imm);
-}
-
-int eor_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
-{
-  return dop_imm(codebuf, DP_EOR, dst, src, imm);
-}
-
-int and_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
-{
-  return dop_imm(codebuf, DP_AND, dst, src, imm);
-}
-
-int orr_imm(CodeBuf *codebuf, u32 dst, u32 src, u32 imm)
-{
-  return dop_imm(codebuf, DP_ORR, dst, src, imm);
-}
-
-int cmp_imm(CodeBuf *codebuf, Reg src, u32 imm)
-{
-  if (src < ARM_R8 && imm < 256) return out_16(codebuf, T_CMP_IMM(src, imm));
-  return dop_imm(codebuf, DP_CMP, 0x0f, src, imm);
-}
-
-int tst_imm(CodeBuf *codebuf, Reg src, u32 imm)
-{
-  return dop_imm(codebuf, DP_TST, 0x0f, src, imm);
-}
-
-void fullBarrier(CodeBuf *codebuf)
-{
-  if (os::is_MP())
-    out_16x2(codebuf, T_DMB(0xf));
-}
-
-void storeBarrier(CodeBuf *codebuf)
-{
-  if (os::is_MP())
-    out_16x2(codebuf, T_DMB(0xe));
-}
-
-int tbh(CodeBuf *codebuf, Reg base, Reg idx)
-{
-  out_16x2(codebuf, T_TBH(base, idx));
-}
-
-int umull(CodeBuf *codebuf, u32 res_lo, u32 res_hi, u32 lho, u32 rho)
-{
-  return out_16x2(codebuf, T_UMULL(res_lo, res_hi, lho, rho));
-}
-
-int mla(CodeBuf *codebuf, u32 res, u32 lho, u32 rho, u32 a)
-{
-  return out_16x2(codebuf, T_MLA(res, lho, rho, a));
-}
-
-#define COND_EQ 0
-#define COND_NE 1
-#define COND_LT	2
-#define COND_GE 3
-#define COND_GT 4
-#define COND_LE 5
-#define COND_CS 6
-#define COND_CC 7
-#define COND_MI 8
-#define COND_PL 9
-
-static unsigned conds[] = {
-	0x0,
-	0x1,
-	0xb,
-	0xa,
-	0xc,
-	0xd,
-	0x2,
-	0x3,
-	0x4,
-	0x5,
-};
-
-#define NEG_COND(cond)	((cond) ^ 1)
-
-#define T_B(uoff)	(0xe000 | ((uoff) & 0x7ff))
-#define T_BW(uoff)	(0xf0009000 | \
-			  (((uoff) & (1<<23)) << (26-23)) | \
-			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
-			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
-			  (((uoff) & 0x1ff800) << (16-11)) | \
-			  ((uoff) & 0x7ff))
-#define T_BL(uoff)	(0xf000d000 | \
-			  (((uoff) & (1<<23)) << (26-23)) | \
-			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
-			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
-			  (((uoff) & 0x1ff800) << (16-11)) | \
-			  ((uoff) & 0x7ff))
-#define T_BLX(uoff)	(0xf000c000 | \
-			  (((uoff) & (1<<23)) << (26-23)) | \
-			  (((~(uoff) & (1<<22)) >> 22) ^ (((uoff) & (1<<23)) >> 23)) << 13 | \
-			  (((~(uoff) & (1<<21)) >> 21) ^ (((uoff) & (1<<23)) >> 23)) << 11 | \
-			  (((uoff) & 0x1ff800) << (16-11)) | \
-			  ((uoff) & 0x7ff))
-#define T_BCC(cond, uoff) (0xd000 | (conds[cond] << 8) | ((uoff) & 0xff))
-#define T_BCCW(cond, uoff) (0xf0008000 | \
-			     (conds[cond] << 22) | \
-			     (((uoff) & (1<<19)) << (26-19)) | \
-			     (((uoff) & (1<<18)) >> (18-11)) | \
-			     (((uoff) & (1<<17)) >> (17-13)) | \
-			     (((uoff) & 0x1f800) << (16-11)) | \
-			     ((uoff) & 0x7ff))
-#define T_BLX_REG(r)	(0x4780 | ((r) << 3))
-#define T_CBZ(r, uoff)	(0xb100 | (((uoff) & 0x1f) << 3) | (((uoff) & 0x20) << (8-5)) | ((r) & 7))
-#define T_CBNZ(r, uoff)	(0xb900 | (((uoff) & 0x1f) << 3) | (((uoff) & 0x20) << (8-5)) | ((r) & 7))
-
-#define T_IT(cond, mask) (0xbf00 | (conds[cond] << 4) | (mask))
-
-#define IT_MASK_T	8
-#define IT_MASK_TEE	0x0e
-
-#define PATCH(loc)	do {						\
-	  unsigned oldidx = codebuf->idx;				\
-	  codebuf->idx = (loc) >> 1;					\
-
-#define HCTAP								\
-	  codebuf->idx = oldidx;					\
-    	} while (0)
-
-int forward_16(CodeBuf *codebuf)
-{
-  int loc = out_loc(codebuf);
-  out_16(codebuf, T_UNDEFINED_16);
-  return loc;
-}
-
-int forward_32(CodeBuf *codebuf)
-{
-  int loc = out_loc(codebuf);
-  out_32(codebuf, T_UNDEFINED_32);
-  return loc;
-}
-
-int it(CodeBuf *codebuf, unsigned cond, unsigned mask)
-{
-  if (cond & 1) {
-    // If this is a negated condition, flip all the bits above the
-    // least significant bit that is 1.  Note that at least one bit is
-    // always 1 in mask
-    switch (mask & (-mask)) {
-    case 8:
-      break;
-    case 4:
-      mask ^= 8;
-      break;
-    case 2:
-      mask ^= 0x0c;
-      break;
-    case 1:
-      mask ^= 0x0e;
-      break;
-    default:
-      // Impossible unless someone specified an incorrect mask
-      longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
-    }
-  }
-
-  return out_16(codebuf, T_IT(cond, mask));
-}
-
-void t2_bug_align(CodeBuf *codebuf)
-{
-  unsigned pc = (unsigned)&codebuf->codebuf[codebuf->idx];
-  if ((pc & 0xffe) != 0xffe) return;
-  mov_reg(codebuf, ARM_R0, ARM_R0);
-}
-
-void t2_bug_fix(CodeBuf *codebuf, int offset)
-{
-  unsigned pc = (unsigned)&codebuf->codebuf[codebuf->idx];
-  if ((pc & 0xffe) != 0xffe) return;
-  if (offset >= 0 || offset < -(4096+4)) return;
-  mov_reg(codebuf, ARM_R0, ARM_R0);
-}
-
-int branch_uncond(CodeBuf *codebuf, unsigned dest)
-{
-  unsigned loc = (codebuf->idx * 2) + 4;
-  int offset;
-  unsigned uoff;
-
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest - loc;
-  if (offset >= -(1<<10) && offset < (1<<10)) {
-    uoff = offset;
-    return out_16(codebuf, T_B(uoff));
-  }
-  t2_bug_fix(codebuf, offset);
-  if (offset >= -(1<<23) && offset < (1<<23)) {
-    uoff = offset;
-    return out_16x2(codebuf, T_BW(uoff));
-  }
-  J_Unimplemented();
-}
-
-int branch_uncond_patch(CodeBuf *codebuf, unsigned loc, unsigned dest)
-{
-  int offset;
-  unsigned uoff;
-  unsigned oldidx;
-  int rc;
-
-  oldidx = codebuf->idx;
-  codebuf->idx = loc >> 1;
-  loc += 4;
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest - loc;
-  t2_bug_fix(codebuf, offset);
-  if (offset >= -(1<<23) && offset < (1<<23)) {
-    uoff = offset & ((1<<24)-1);
-    rc = out_16x2(codebuf, T_BW(uoff));
-    codebuf->idx = oldidx;
-    return rc;
-  }
-  J_Unimplemented();
-}
-
-int branch_narrow_patch(CodeBuf *codebuf, unsigned loc)
-{
-  int offset;
-  unsigned uoff;
-  unsigned oldidx;
-  unsigned dest;
-  int rc;
-
-  dest = codebuf->idx * 2;
-  oldidx = codebuf->idx;
-  codebuf->idx = loc >> 1;
-  loc += 4;
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest - loc;
-  if (offset >= -(1<<10) && offset < (1<<10)) {
-    uoff = offset & ((1<<11)-1);
-    rc = out_16(codebuf, T_B(uoff));
-    codebuf->idx = oldidx;
-    return rc;
-  }
-  J_Unimplemented();
-}
-
-int branch(CodeBuf *codebuf, unsigned cond, unsigned dest)
-{
-  unsigned loc = (codebuf->idx * 2) + 4;
-  int offset;
-  unsigned uoff;
-
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest - loc;
-  if (offset >= -(1<<7) && offset < (1<<7)) {
-    uoff = offset;
-    return out_16(codebuf, T_BCC(cond, uoff));
-  }
-  t2_bug_fix(codebuf, offset);
-  if (offset >= -(1<<19) && offset < (1<<19)) {
-    uoff = offset;
-    return out_16x2(codebuf, T_BCCW(cond, uoff));
-  }
-  J_Unimplemented();
-}
-
-int bcc_patch(CodeBuf *codebuf, unsigned cond, unsigned loc)
-{
-  int offset;
-  unsigned uoff;
-  unsigned oldidx;
-  unsigned dest;
-  int rc;
-
-  dest = codebuf->idx * 2;
-  oldidx = codebuf->idx;
-  codebuf->idx = loc >> 1;
-  loc += 4;
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest-loc;
-  if (offset >= -(1<<7) && offset < (1<<7)) {
-    uoff = offset;
-    rc = out_16(codebuf, T_BCC(cond, uoff));
-    codebuf->idx = oldidx;
-    return rc;
-  }
-  J_Unimplemented();
-}
-
-int bl(CodeBuf *codebuf, unsigned dest)
-{
-  unsigned loc = (unsigned)&codebuf->codebuf[codebuf->idx] + 4;
-  int offset;
-  unsigned uoff;
-
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest - loc;
-  t2_bug_fix(codebuf, offset);
-  if (offset >= -(1<<23) && offset < (1<<23)) {
-    uoff = offset;
-    return out_16x2(codebuf, T_BL(uoff));
-  }
-  J_Unimplemented();
-}
-
-int blx(CodeBuf *codebuf, unsigned dest)
-{
-  unsigned loc = (unsigned)&codebuf->codebuf[codebuf->idx] + 4;
-  int offset;
-  unsigned uoff;
-
-  JASSERT((dest & 3) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  loc &= ~1;
-  offset = dest - loc;
-  t2_bug_fix(codebuf, offset);
-  if (offset >= -(1<<23) && offset < (1<<23)) {
-    uoff = offset;
-    return out_16x2(codebuf, T_BLX(uoff));
-  }
-  J_Unimplemented();
-}
-
-int branch_patch(CodeBuf *codebuf, unsigned cond, unsigned loc, unsigned dest)
-{
-  int offset;
-  unsigned uoff;
-  unsigned oldidx;
-  int rc;
-
-  oldidx = codebuf->idx;
-  codebuf->idx = loc >> 1;
-  loc += 4;
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest - loc;
-  t2_bug_fix(codebuf, offset);
-  if (offset >= -(1<<19) && offset < (1<<19)) {
-    uoff = offset & ((1<<20)-1);
-    rc = out_16x2(codebuf, T_BCCW(cond, uoff));
-    codebuf->idx = oldidx;
-    return rc;
-  }
-  J_Unimplemented();
-}
-
-int blx_reg(CodeBuf *codebuf, Reg r)
-{
-  return out_16(codebuf, T_BLX_REG(r));
-}
-
-int cbz_patch(CodeBuf *codebuf, Reg r, unsigned loc)
-{
-  unsigned offset;
-  unsigned oldidx;
-  unsigned dest;
-  int rc;
-
-  dest = codebuf->idx * 2;
-  oldidx = codebuf->idx;
-  codebuf->idx = loc >> 1;
-  loc += 4;
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest-loc;
-  if (r < ARM_R8 && offset < 64) {
-    rc = out_16(codebuf, T_CBZ(r, offset));
-    codebuf->idx = oldidx;
-    return rc;
-  }
-  J_Unimplemented();
-}
-
-int cbnz_patch(CodeBuf *codebuf, Reg r, unsigned loc)
-{
-  unsigned offset;
-  unsigned oldidx;
-  unsigned dest;
-  int rc;
-
-  dest = codebuf->idx * 2;
-  oldidx = codebuf->idx;
-  codebuf->idx = loc >> 1;
-  loc += 4;
-  JASSERT((dest & 1) == 0 && (loc & 1) == 0, "unaligned code");
-  dest >>= 1;
-  loc >>= 1;
-  offset = dest-loc;
-  if (r < ARM_R8 && offset < 64) {
-    rc = out_16(codebuf, T_CBNZ(r, offset));
-    codebuf->idx = oldidx;
-    return rc;
-  }
-  J_Unimplemented();
-}
-
-int chka(CodeBuf *codebuf, u32 size, u32 idx)
-{
-  cmp_reg(codebuf, idx, size);
-  it(codebuf, COND_CS, IT_MASK_T);
-  bl(codebuf, handlers[H_ARRAYBOUND]);
-}
-
-//-----------------------------------------------------------------------------------
-
-// An example of some debugging logic that you can use to trigger a
-// breakpoint when a particular method is executing.
-#define EQ(S1, S2) (S1 && (strncmp(S1, S2, strlen(S2)) == 0))
-extern "C" void Debug(interpreterState istate)
-{
-  char valuebuf[8192];
-  istate->method()->name_and_sig_as_C_string(valuebuf, sizeof valuebuf);
-  if (EQ(valuebuf, "java.util.Hashtable.get(Ljava/lang/Object;)")
-      // && istate->method()->bci_from(istate->bcp()) == 45
-      ) {
-    asm("nop");
-  }
-}
-#undef EQ
-
-void Thumb2_Push_Multiple(CodeBuf *codebuf, Reg *regs, unsigned nregs)
-{
-  unsigned regset = 0;
-  unsigned regmask;
-  unsigned i;
-  Reg r;
-
-  JASSERT(nregs > 0, "nregs must be > 0");
-  if (nregs == 1) {
-    str_imm(codebuf, regs[0], Rstack, -4, 1, 1);
-    return;
-  }
-  for (i = 0; i < nregs; i++) {
-    r = regs[i];
-    if (!IS_ARM_INT_REG(r)) J_Unimplemented();
-    regmask = 1<<r;
-    if (regset != 0 && regmask >= (regset & -regset)) {
-      stm(codebuf, regset, Rstack, PUSH_FD, 1);
-      regset = 0;
-    }
-    regset |= regmask;
-  }
-  stm(codebuf, regset, Rstack, PUSH_FD, 1);
-}
-
-void Thumb2_Pop_Multiple(CodeBuf *codebuf, Reg *regs, unsigned nregs)
-{
-  unsigned regset = 0;
-  unsigned regmask;
-  unsigned i;
-  Reg r;
-
-  if (nregs == 0)
-    return;
-  JASSERT(nregs > 0, "nregs must be > 0");
-  if (nregs == 1) {
-    ldr_imm(codebuf, regs[0], Rstack, 4, 0, 1);
-    return;
-  }
-  i = nregs;
-  do {
-    i--;
-    r = regs[i];
-    if (!IS_ARM_INT_REG(r)) J_Unimplemented();
-    regmask = 1<<r;
-    if (regmask <= (regset & -regset)) {
-      ldm(codebuf, regset, Rstack, POP_FD, 1);
-      regset = 0;
-    }
-    regset |= regmask;
-  } while (i > 0);
-  ldm(codebuf, regset, Rstack, POP_FD, 1);
-}
-
-int mov_multiple(CodeBuf *codebuf, Reg *dst, Reg *src, unsigned nregs)
-{
-  unsigned u, n, p;
-  unsigned smask = 0;
-  unsigned dmask = 0;
-  unsigned free_mask, free_reg;
-
-  for (u = 0, n = 0; u < nregs; u++) {
-    JASSERT(dst[u] != ARM_IP, "mov_multiple cannot be used for ARM_IP");
-    JASSERT(src[u] != ARM_IP, "mov_multiple cannot be used for ARM_IP");
-    if (dst[u] != src[u]) {
-      dst[n] = dst[u];
-      src[n++] = src[u];
-    }
-  }
-  while (n) {
-    // Find a reg which is in the dst reg set but not the src reg set
-    smask = 0;
-    dmask = 0;
-    for (u = 0; u < n; u++) {
-      smask |= (1 << src[u]);
-      dmask |= (1 << dst[u]);
-    }
-    free_mask = dmask & ~smask;
-    if (!free_mask) {
-      // No such reg => must use IP
-      Reg r = dst[0];
-      mov_reg(codebuf, ARM_IP, r);
-      for (u = 0; u < n; u++) {
-	if (src[u] == r) src[u] = ARM_IP;
-      }
-      smask ^= (1<<r) | (1<<ARM_IP);
-      free_mask = dmask & ~smask;
-      JASSERT(free_mask, "still no free reg after using ARM_IP?");
-    }
-    free_reg = LOG2(free_mask);
-    for (u = 0, p = 0; u < n; u++) {
-      if (dst[u] == free_reg) {
-	mov_reg(codebuf, dst[u], src[u]);
-      } else {
-	dst[p] = dst[u];
-	src[p++] = src[u];
-      }
-    }
-    n--;
-  }
-  return 0;
-}
-
-#define TOS(jstack)	((jstack)->stack[(jstack)->depth-1])
-#define TOSM1(jstack)	((jstack)->stack[(jstack)->depth-2])
-#define TOSM2(jstack)	((jstack)->stack[(jstack)->depth-3])
-#define TOSM3(jstack)	((jstack)->stack[(jstack)->depth-4])
-
-#define SWAP(jstack) do { \
-		      Reg r = (jstack)->stack[(jstack)->depth-1]; \
-		      (jstack)->stack[(jstack)->depth-1] = (jstack)->stack[(jstack)->depth-2]; \
-		      (jstack)->stack[(jstack)->depth-2] = r; \
-		    } while (0)
-
-#define JSTACK_REG(jstack)		jstack_reg(jstack)
-#define JSTACK_PREFER(jstack, prefer)	jstack_prefer(jstack, prefer)
-
-int PUSH(Thumb2_Stack *jstack, unsigned reg) {
-  jstack->stack[jstack->depth] = reg;
-  jstack->depth++;
-  return reg;
-}
-
-int POP(Thumb2_Stack *jstack) {
-  jstack->depth--;
-  return jstack->stack[jstack->depth];
-}
-
-static const unsigned last_clear_bit[] = {
-	3,	//	0000
-	3,	//	0001
-	3,	//	0010
-	3,	//	0011
-	3,	//	0100
-	3,	//	0101
-	3,	//	0110
-	3,	//	0111
-	2,	//	1000
-	2,	//	1001
-	2,	//	1010
-	2,	//	1011
-	1,	//	1100
-	1,	//	1101
-	0,	//	1110
-	0,	//	1111 // No registers available...
-};
-
-#define LAST_CLEAR_BIT(mask) last_clear_bit[mask]
-
-unsigned long thumb2_register_allocation_failures = 0;
-
-unsigned jstack_reg(Thumb2_Stack *jstack)
-{
-  unsigned *stack = jstack->stack;
-  unsigned depth = jstack->depth;
-  unsigned mask = 0;
-  unsigned r;
-  unsigned i;
-
-  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
-  mask &= (1 << STACK_REGS) - 1;
-  if (mask >= (1 << STACK_REGS) - 1)  { // No free registers
-    thumb2_register_allocation_failures++;
-    J_BogusImplementation();
-  }
-  r = LAST_CLEAR_BIT(mask);
-  return r;
-}
-
-unsigned jstack_prefer(Thumb2_Stack *jstack, Reg prefer)
-{
-  unsigned *stack = jstack->stack;
-  unsigned depth = jstack->depth;
-  unsigned mask = 0;
-  unsigned r;
-  unsigned i;
-
-  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
-  mask &= (1 << STACK_REGS) - 1;
-  if ((prefer & ~mask) & 0x0f) mask |= (~prefer & ((1 << STACK_REGS) - 1));
-  if (mask >= (1 << STACK_REGS) - 1)  { // No free registers
-    thumb2_register_allocation_failures++;
-    J_BogusImplementation();
-  }
-  r = LAST_CLEAR_BIT(mask);
-  return r;
-}
-
-void Thumb2_Fill(Thumb2_Info *jinfo, unsigned required)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned *stack = jstack->stack;
-  unsigned depth = jstack->depth;
-  unsigned mask = 0;
-  unsigned tofill;
-  unsigned r, i;
-
-  if (depth >= required) return;
-  tofill = required - depth;
-  for (i = depth; i > 0;) {
-    i--;
-    mask |= 1 << stack[i];
-    stack[i+tofill] = stack[i];
-  }
-  mask &= (1 << STACK_REGS) - 1;
-  for (i = 0; i < tofill; i++) {
-    JASSERT(mask != (1 << STACK_REGS) - 1, "Fill failed!!!");
-    r = LAST_CLEAR_BIT(mask);
-    mask |= (1 << r);
-    stack[i] = r;
-  }
-  jstack->depth = depth + tofill;
-  Thumb2_Pop_Multiple(jinfo->codebuf, stack, tofill);
-}
-
-static const unsigned bitcount[] = {
-	0,	// 0000
-	1,	// 0001
-	1,	// 0010
-	2,	// 0011
-	1,	// 0100
-	2,	// 0101
-	2,	// 0110
-	3,	// 0111
-	1,	// 1000
-	2,	// 1001
-	2,	// 1010
-	3,	// 1011
-	2,	// 1100
-	3,	// 1101
-	3,	// 1110
-	4,	// 1111
-};
-
-#define BITCOUNT(mask) bitcount[mask]
-
-// Thumb2_Spill:-
-// 	required - ensure that at least this many registers are available
-// 	exclude - bitmask, do not count these registers as available
-//
-// 	The no. of available regs (STACK_REGS) less the no. of registers in
-// 	exclude must be >= the number required, otherwise this function loops!
-//
-// 	Typical usage is
-//
-// 	Thumb2_Spill(jinfo, 2, 0);	// get 2 free regs
-// 	r_res_lo = PUSH(jinfo->jstack, JSTACK_REG(jinfo->jstack));
-// 	r_res_hi = PUSH(jinfo->jstack, JSTACK_REG(jinfo->jstack));
-//
-//	Use the exclude mask when you do not want a subsequent call to
-//	JSTACK_REG to return a particular register or registers. This can
-//	be useful, for example, with long (64) bit operations. Eg. In the
-//	following we use it to ensure that the hi inputs are not clobbered
-//	by the lo result as part of the intermediate calculation.
-//
-//	Thumb2_Fill(jinfo, 4);
-//	exclude = (1<<rho_hi)|(1<<lho_hi);
-//	rho_lo = POP(jstack);
-//	rho_hi = POP(jstack);
-//	lho_lo = POP(jstack);
-//	lho_hi = POP(jstack);
-//	Thumb2_Spill(jinfo, 2, exclude);
-//	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~exclude));	// != rho_hi or lho_hi
-//	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~exclude));	// != rho_hi or lho_hi
-//	dop_reg(jinfo->codebuf, DP_ADD, res_lo, lho_lo, rho_lo, SHIFT_LSL, 0); 
-//	dop_reg(jinfo->codebuf, DP_ADC, res_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
-//	
-void Thumb2_Spill(Thumb2_Info *jinfo, unsigned required, unsigned exclude)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned *stack = jstack->stack;
-  unsigned depth = jstack->depth;
-  unsigned mask;
-  unsigned i;
-  unsigned tospill = 0;
-
-  exclude &= (1 << STACK_REGS) - 1;
-  if (depth <= (STACK_REGS - required) && exclude == 0) return;
-  while (1) {
-    mask = 0;
-    for (i = tospill; i < depth; i++) mask |= 1 << stack[i];
-    mask &= ((1 << STACK_REGS) - 1);
-    mask |= exclude;
-    if (STACK_REGS - BITCOUNT(mask) >= required) break;
-    tospill++;
-  }
-  if (tospill == 0) return;
-  Thumb2_Push_Multiple(jinfo->codebuf, stack, tospill);
-  for (i = tospill; i < depth; i++)
-    stack[i-tospill] = stack[i];
-  jstack->depth = depth - tospill;
-  JASSERT((int)jstack->depth >= 0, "Stack underflow");
-}
-
-// Thumb2_Tmp:-
-// 	Allocate a temp reg for use in local code generation.
-// 	exclude is a bit mask of regs not to use.
-// 	A max of 2 regs can be guaranteed (ARM_IP & ARM_LR)
-// 	If allocating 2 regs you must include the reg you got the
-// 	first time in the exclude list. Otherwise you just get
-// 	the same reg again.
-Reg Thumb2_Tmp(Thumb2_Info *jinfo, unsigned exclude)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned *stack = jstack->stack;
-  unsigned depth = jstack->depth;
-  unsigned mask;
-  unsigned i;
-
-  mask = 0;
-  for (i = 0; i < depth; i++) mask |= 1 << stack[i];
-  mask |= exclude;
-  for (i = 0; i < STACK_REGS; i++)
-    if ((mask & (1<<i)) == 0) return i;
-  if ((mask & (1<<ARM_IP)) == 0) return ARM_IP;
-  if ((mask & (1<<ARM_LR)) == 0) return ARM_LR;
-  JASSERT(0, "failed to allocate a tmp reg");
-}
-
-void Thumb2_Flush(Thumb2_Info *jinfo)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-
-  if (jstack->depth > 0)
-    Thumb2_Push_Multiple(jinfo->codebuf, jstack->stack, jstack->depth);
-  jstack->depth = 0;
-}
-
-// SAVE_STACK and RESTORE_STACK save the stack state so that it's
-// possible to do a stack flush to memory and restore that stack state
-// to the same registers.
-#define SAVE_STACK(JSTACK)					\
-  unsigned saved_stack_elements[JSTACK->depth];			\
-  unsigned saved_stack_depth;					\
-  memcpy(saved_stack_elements, JSTACK->stack,			\
-	 JSTACK->depth * sizeof saved_stack_elements[0]);	\
-  saved_stack_depth = JSTACK->depth;
-#define RESTORE_STACK(JSTACK, CODEBUF)					\
-  Thumb2_Pop_Multiple(CODEBUF, saved_stack_elements, saved_stack_depth); \
-  memcpy(JSTACK->stack, saved_stack_elements,				\
-	 JSTACK->depth * sizeof saved_stack_elements[0]);		\
-  JSTACK->depth = saved_stack_depth;
-
-// Call this when we are about to corrupt a local
-// The local may already be on the stack
-// For example
-// 	iload	0
-// 	iconst	2
-// 	istore	0
-// 	istore	1
-// Without this check the code generated would be (r4 is local 0, r5 is local 1)
-// 	mov	r4, #2
-//	mov	r5, r4
-// With this check the code should be
-// 	mov	r3, r4
-// 	mov	r4, #2
-// 	mov	r5, r3
-// This is not ideal, but is better than the previous:-)
-//
-void Thumb2_Corrupt(Thumb2_Info *jinfo, unsigned r, unsigned ignore)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned *stack = jstack->stack;
-  unsigned depth = jstack->depth;
-  unsigned r_new, mask;
-  unsigned i;
-
-  if (ignore >= depth) return;
-//  JASSERT(depth >= ignore, "Cant ignore more than the whole stack!!");
-  if (IS_SREG(r)) return;
-  depth -= ignore;
-  for (i = 0; i < depth; i++) {
-    if (r == stack[i]) {
-      Thumb2_Spill(jinfo, 1, 0);
-      depth = jstack->depth - ignore;
-      r_new = JSTACK_REG(jstack);
-      mov_reg(jinfo->codebuf, r_new, r);
-      for (i = 0; i < depth; i++) if (r == stack[i]) stack[i] = r_new;
-      break;
-    }
-  }
-}
-
-unsigned Thumb2_ResultLocal(Thumb2_Info *jinfo, unsigned bci)
-{
-  unsigned opc = jinfo->code_base[bci];
-  if (jinfo->bc_stackinfo[bci] & BC_BRANCH_TARGET) return 0;
-  if (opc < opc_istore || opc > opc_astore_3) return 0;
-  if (opc == opc_istore || opc == opc_fstore || opc == opc_astore)
-    return jinfo->jregs->r_local[jinfo->code_base[bci+1]];
-  if ((opc >= opc_istore_0 && opc <= opc_istore_3) ||
-	(opc >= opc_fstore_0 && opc <= opc_fstore_3) ||
-	(opc >= opc_astore_0 && opc <= opc_astore_3))
-    return jinfo->jregs->r_local[(opc-opc_istore_0)&3];
-  return 0;
-}
-
-static const unsigned char dOps[] = {
-	DP_ADD, DP_ADC, VP_ADD, VP_ADD,
-	DP_SUB, DP_SBC, VP_SUB, VP_SUB,
-	DP_MUL, 0, VP_MUL, VP_MUL,
-	0, 0, VP_DIV, VP_DIV,
-	0, 0, 0, 0,
-	0, 0, 0, 0,
-	DP_LSL, 0,
-	DP_ASR, 0,
-	DP_LSR, 0,
-	DP_AND, DP_AND, DP_ORR, DP_ORR, DP_EOR, DP_EOR,
-};
-
-unsigned Thumb2_Imm(Thumb2_Info *jinfo, unsigned imm, unsigned next_bci)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r;
-  unsigned next_op;
-
-  if (!(jinfo->bc_stackinfo[next_bci] & BC_BRANCH_TARGET)) {
-    next_op = jinfo->code_base[next_bci];
-    if (next_op > OPC_LAST_JAVA_OP) {
-      if (Bytecodes::is_defined((Bytecodes::Code)next_op))
-	next_op = (unsigned)Bytecodes::java_code((Bytecodes::Code)next_op);
-    }
-    switch (next_op) {
-      case opc_istore:
-      case opc_fstore:
-      case opc_astore: {
-	unsigned local = jinfo->code_base[next_bci+1];
-	r = jinfo->jregs->r_local[local];
-	if (r) {
-	  Thumb2_Corrupt(jinfo, r, 0);
-	  mov_imm(jinfo->codebuf, r, imm);
-	  return 2;
-	}
-	break;
-      }
-      case opc_istore_0:
-      case opc_istore_1:
-      case opc_istore_2:
-      case opc_istore_3:
-      case opc_fstore_0:
-      case opc_fstore_1:
-      case opc_fstore_2:
-      case opc_fstore_3:
-      case opc_astore_0:
-      case opc_astore_1:
-      case opc_astore_2:
-      case opc_astore_3: {
-	unsigned local = (jinfo->code_base[next_bci]-opc_istore_0) & 3;
-	r = jinfo->jregs->r_local[local];
-	if (r) {
-	  Thumb2_Corrupt(jinfo, r, 0);
-	  mov_imm(jinfo->codebuf, r, imm);
-	  return 1;
-	}
-	break;
-      }
-      case opc_iadd:
-      case opc_isub:
-      case opc_ishl:
-      case opc_ishr:
-      case opc_iushr:
-      case opc_iand:
-      case opc_ior:
-      case opc_ixor: {
-	unsigned len = 0;
-	unsigned r_lho;
-
-	Thumb2_Fill(jinfo, 1);
-	r_lho = POP(jstack);
-
-	r = Thumb2_ResultLocal(jinfo, next_bci+1);
-	if (r) {
-	  Thumb2_Corrupt(jinfo, r, 0);
-	  len = Bytecodes::length_for((Bytecodes::Code)jinfo->code_base[next_bci+1]);
-	} else {
-	  Thumb2_Spill(jinfo, 1, 0);
-	  r = JSTACK_REG(jstack);
-	  PUSH(jstack, r);
-	}
-	if (next_op == opc_ishl || next_op == opc_ishr || next_op == opc_iushr)
-	  shift_imm(jinfo->codebuf, dOps[next_op-opc_iadd], r, r_lho, imm);
-	else
-	  dop_imm(jinfo->codebuf, dOps[next_op-opc_iadd], r, r_lho, imm);
-	return 1+len;
-      }
-
-      case opc_idiv: {
-	unsigned len = 0;
-	unsigned r_lho;
-	unsigned abs_imm = abs((int)imm);
-
-	if ((imm & -imm) == abs_imm) {
-	  unsigned l2_imm = LOG2(abs_imm);
-	  unsigned r_lho;
-
-	  if (imm == 0) break;
-	  if (imm == 1) return 1;
-
-	  Thumb2_Fill(jinfo, 1);
-	  r_lho = POP(jstack);
-
-	  r = Thumb2_ResultLocal(jinfo, next_bci+1);
-	  if (r) {
-	    Thumb2_Corrupt(jinfo, r, 0);
-	    len = Bytecodes::length_for((Bytecodes::Code)jinfo->code_base[next_bci+1]);
-	  } else {
-	    Thumb2_Spill(jinfo, 1, 0);
-	    r = JSTACK_REG(jstack);
-	    PUSH(jstack, r);
-	  }
-
-	  if (abs_imm != 1) {
-	    unsigned r_tmp = r_lho;
-	    if (abs_imm != 2) {
-	      r_tmp = Thumb2_Tmp(jinfo, (1<<r_lho));
-	      asr_imm(jinfo->codebuf, r_tmp, r_lho, 31);
-	    }
-	    add_reg_shift(jinfo->codebuf, r, r_lho, r_tmp, SHIFT_LSR, 32-l2_imm);
-	    asr_imm(jinfo->codebuf, r, r, l2_imm);
-	  }
-	  if ((int)imm < 0)
-	    rsb_imm(jinfo->codebuf, r, r, 0);
-	  return 1+len;
-	}
-	break;
-      }
-    }
-  }
-  Thumb2_Spill(jinfo, 1, 0);
-  r = JSTACK_REG(jstack);
-  PUSH(jstack, r);
-  mov_imm(jinfo->codebuf, r, imm);
-  return 0;
-}
-
-void Thumb2_ImmX2(Thumb2_Info *jinfo, unsigned lo, unsigned hi)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lo, r_hi;
-
-  Thumb2_Spill(jinfo, 2, 0);
-  r_hi = PUSH(jstack, JSTACK_REG(jstack));
-  r_lo = PUSH(jstack, JSTACK_REG(jstack));
-  mov_imm(jinfo->codebuf, r_lo, lo);
-  mov_imm(jinfo->codebuf, r_hi, hi);
-}
-
-#define LOCAL_OFFSET(local, stackdepth, nlocals) ((stackdepth)*4 + FRAME_SIZE + ((nlocals)-1-(local))*4)
-#define ISTATE_REG(jinfo)	  ((jinfo)->use_istate ? Ristate : Rstack)
-#define ISTATE(jinfo, stackdepth) ((jinfo)->use_istate ? 0 : (((stackdepth)-(jinfo)->jstack->depth)*4))
-#define ISTATE_OFFSET(jinfo, stackdepth, offset) (ISTATE(jinfo, stackdepth) + (offset))
-
-void load_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  if (jinfo->use_istate)
-    ldr_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4, 1, 0);
-  else
-    ldr_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
-}
-
-void store_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  if (jinfo->use_istate)
-    str_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4, 1, 0);
-  else
-    str_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
-}
-
-void load_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
-{
-  if (jinfo->use_istate)
-    ldr_imm(jinfo->codebuf, r, Ristate, istate_offset, 1, 0);
-  else
-    ldr_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset), 1, 0);
-}
-
-void store_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
-{
-  if (jinfo->use_istate)
-    str_imm(jinfo->codebuf, r, Ristate, istate_offset, 1, 0);
-  else
-    str_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset), 1, 0);
-}
-
-void Thumb2_Load(Thumb2_Info *jinfo, int local, unsigned stackdepth)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r;
-
-  r = jinfo->jregs->r_local[local];
-  if (r) {
-    PUSH(jstack, r);
-  } else {
-    int nlocals = jinfo->method->max_locals();
-
-    Thumb2_Spill(jinfo, 1, 0);
-    JASSERT(stackdepth >= jstack->depth, "negative stack offset?");
-    stackdepth -= jstack->depth;
-    r = JSTACK_REG(jstack);
-    PUSH(jstack, r);
-    load_local(jinfo, r, local, stackdepth);
-  }
-}
-
-void Thumb2_LoadX2(Thumb2_Info *jinfo, int local, unsigned stackdepth)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lo, r_hi;
-  int nlocals = jinfo->method->max_locals();
-
-  r_hi = jinfo->jregs->r_local[local];
-  if (r_hi) {
-    r_lo = jinfo->jregs->r_local[local+1];
-    if (r_lo) {
-      PUSH(jstack, r_hi);
-      PUSH(jstack, r_lo);
-    } else {
-      Thumb2_Spill(jinfo, 1, 0);
-      stackdepth -= jstack->depth;
-      PUSH(jstack, r_hi);
-      r_lo = PUSH(jstack, JSTACK_REG(jstack));
-      load_local(jinfo, r_lo, local+1, stackdepth);
-    }
-  } else {
-    r_lo = jinfo->jregs->r_local[local+1];
-    if (r_lo) {
-      Thumb2_Spill(jinfo, 1, 0);
-      stackdepth -= jstack->depth;
-      r_hi = PUSH(jstack, JSTACK_REG(jstack));
-      load_local(jinfo, r_hi, local, stackdepth);
-      PUSH(jstack, r_lo);
-    } else {
-      Thumb2_Spill(jinfo, 2, 0);
-      stackdepth -= jstack->depth;
-      r_hi = PUSH(jstack, JSTACK_REG(jstack));
-      r_lo = PUSH(jstack, JSTACK_REG(jstack));
-      load_local(jinfo, r_hi, local, stackdepth);
-      load_local(jinfo, r_lo, local+1, stackdepth);
-    }
-  }
-}
-
-void Thumb2_Store(Thumb2_Info *jinfo, int local, unsigned stackdepth)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r, r_local;
-  int nlocals = jinfo->method->max_locals();
-
-  Thumb2_Fill(jinfo, 1);
-  stackdepth -= jstack->depth;
-  r = POP(jstack);
-  r_local = jinfo->jregs->r_local[local];
-  if (r_local) {
-    Thumb2_Corrupt(jinfo, r_local, 0);
-    mov_reg(jinfo->codebuf, r_local, r);
-  } else {
-    store_local(jinfo, r, local, stackdepth);
-  }
-}
-
-void Thumb2_StoreX2(Thumb2_Info *jinfo, int local, unsigned stackdepth)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lo, r_hi;
-  unsigned r_local_lo, r_local_hi;
-  int nlocals = jinfo->method->max_locals();
-
-  Thumb2_Fill(jinfo, 2);
-  r_lo = POP(jstack);
-  r_hi = POP(jstack);
-  stackdepth -= 2;
-
-  r_local_hi = jinfo->jregs->r_local[local];
-  if (r_local_hi) {
-    Thumb2_Corrupt(jinfo, r_local_hi, 0);
-    mov_reg(jinfo->codebuf, r_local_hi, r_hi);
-  } else {
-    store_local(jinfo, r_hi, local, stackdepth-jstack->depth);
-  }
-
-  r_local_lo = jinfo->jregs->r_local[local+1];
-  if (r_local_lo) {
-    Thumb2_Corrupt(jinfo, r_local_lo, 0);
-    mov_reg(jinfo->codebuf, r_local_lo, r_lo);
-  } else {
-    store_local(jinfo, r_lo, local+1, stackdepth-jstack->depth);
-  }
-}
-
-void Thumb2_Xaload(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_index, r_array, r_value;
-  unsigned op = opc - (unsigned)opc_iaload;
-  unsigned r_tmp;
-
-  Thumb2_Fill(jinfo, 2);
-  r_index = POP(jstack);
-  r_array = POP(jstack);
-  Thumb2_Spill(jinfo, 1, 0);
-  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index));
-  r_value = JSTACK_REG(jstack);
-  PUSH(jstack, r_value);
-  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8, 1, 0);
-  chka(jinfo->codebuf, r_tmp, r_index);
-  if (opc == opc_baload) {
-    add_reg(jinfo->codebuf, r_tmp, r_array, r_index);
-    ldrsb_imm(jinfo->codebuf, r_value, r_tmp, 12, 1, 0);
-  } else if (opc == opc_caload) {
-    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
-    ldrh_imm(jinfo->codebuf, r_value, r_tmp, 12, 1, 0);
-  } else if (opc == opc_saload) {
-    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
-    ldrsh_imm(jinfo->codebuf, r_value, r_tmp, 12, 1, 0);
-  } else {
-    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 2);
-    ldr_imm(jinfo->codebuf, r_value, r_tmp, 12, 1, 0);
-  }
-}
-
-void Thumb2_X2aload(Thumb2_Info *jinfo)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_index, r_array, r_lo, r_hi;
-  unsigned r_tmp;
-
-  Thumb2_Fill(jinfo, 2);
-  r_index = POP(jstack);
-  r_array = POP(jstack);
-  Thumb2_Spill(jinfo, 2, 0);
-  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index));
-  r_hi = PUSH(jstack, JSTACK_REG(jstack));
-  r_lo = PUSH(jstack, JSTACK_REG(jstack));
-  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8, 1, 0);
-  chka(jinfo->codebuf, r_tmp, r_index);
-  add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 3);
-  ldrd_imm(jinfo->codebuf, r_lo, r_hi, r_tmp, 16, 1, 0);
-}
-
-void Thumb2_Xastore(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_value, r_index, r_array;
-  unsigned op = opc - (unsigned)opc_iastore;
-  unsigned r_tmp;
-
-  Thumb2_Fill(jinfo, 3);
-  r_value = POP(jstack);
-  r_index = POP(jstack);
-  r_array = POP(jstack);
-  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index)|(1<<r_value));
-  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8, 1, 0);
-  chka(jinfo->codebuf, r_tmp, r_index);
-  if (opc == opc_bastore) {
-    add_reg(jinfo->codebuf, r_tmp, r_array, r_index);
-    strb_imm(jinfo->codebuf, r_value, r_tmp, 12, 1, 0);
-  } else if (opc == opc_castore || opc == opc_sastore) {
-    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 1);
-    strh_imm(jinfo->codebuf, r_value, r_tmp, 12, 1, 0);
-  } else {
-    add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 2);
-    str_imm(jinfo->codebuf, r_value, r_tmp, 12, 1, 0);
-  }
-}
-
-void Thumb2_X2astore(Thumb2_Info *jinfo)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lo, r_hi, r_index, r_array;
-  unsigned r_tmp;
-
-  Thumb2_Fill(jinfo, 4);
-  r_lo = POP(jstack);
-  r_hi = POP(jstack);
-  r_index = POP(jstack);
-  r_array = POP(jstack);
-  r_tmp = Thumb2_Tmp(jinfo, (1<<r_array)|(1<<r_index)|(1<<r_lo)|(1<<r_hi));
-  ldr_imm(jinfo->codebuf, r_tmp, r_array, 8, 1, 0);
-  chka(jinfo->codebuf, r_tmp, r_index);
-  add_reg_shift(jinfo->codebuf, r_tmp, r_array, r_index, SHIFT_LSL, 3);
-  strd_imm(jinfo->codebuf, r_lo, r_hi, r_tmp, 16, 1, 0);
-}
-
-void Thumb2_Pop(Thumb2_Info *jinfo, unsigned n)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-
-  while (n > 0 && jstack->depth > 0) {
-    POP(jstack);
-    n--;
-  }
-  if (n > 0) add_imm(jinfo->codebuf, Rstack, Rstack, n * 4);
-}
-
-void Thumb2_Dup(Thumb2_Info *jinfo, unsigned n)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned *stack = jstack->stack;
-  unsigned depth;
-  unsigned i;
-
-  Thumb2_Fill(jinfo, n+1);
-  depth = jstack->depth;
-  for (i = 0; i <= n; i++)
-    stack[depth-i] = stack[depth-i-1];
-  stack[depth-n-1] = stack[depth];
-  jstack->depth = depth + 1;
-}
-
-void Thumb2_Dup2(Thumb2_Info *jinfo, unsigned n)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned *stack = jstack->stack;
-  unsigned depth;
-  unsigned i;
-
-  Thumb2_Fill(jinfo, n+2);
-  depth = jstack->depth;
-  for (i = 0; i <= n+1; i++)
-    stack[depth-i+1] = stack[depth-i-1];
-  stack[depth-n-1] = stack[depth+1];
-  stack[depth-n-2] = stack[depth];
-  jstack->depth = depth + 2;
-}
-
-void Thumb2_Swap(Thumb2_Info *jinfo)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-
-  Thumb2_Fill(jinfo, 2);
-  SWAP(jstack);
-}
-
-void Thumb2_iOp(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lho, r_rho, r;
-
-  Thumb2_Fill(jinfo, 2);
-  r_rho = POP(jstack);
-  r_lho = POP(jstack);
-  Thumb2_Spill(jinfo, 1, 0);
-  r = JSTACK_REG(jstack);
-  PUSH(jstack, r);
-  switch (opc) {
-  case opc_ishl:
-  case opc_ishr:
-  case opc_iushr:
-    {
-      unsigned tmp_reg = Thumb2_Tmp(jinfo, 1 << r_lho | 1 << r_rho | 1 << r);
-      and_imm(jinfo->codebuf, tmp_reg, r_rho, 31);
-      r_rho = tmp_reg;
-      break;
-    }
-  }
-  dop_reg(jinfo->codebuf, dOps[opc-opc_iadd], r, r_lho, r_rho, 0, 0);
-}
-
-void Thumb2_iNeg(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_src, r;
-
-  Thumb2_Fill(jinfo, 1);
-  r_src = POP(jstack);
-  Thumb2_Spill(jinfo, 1, 0);
-  r = JSTACK_REG(jstack);
-  PUSH(jstack, r);
-  rsb_imm(jinfo->codebuf, r, r_src, 0);
-}
-
-void Thumb2_lNeg(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lo, r_hi, r_res_lo, r_res_hi;
-  unsigned r_tmp;
-
-  Thumb2_Fill(jinfo, 2);
-  r_lo = POP(jstack);
-  r_hi = POP(jstack);
-  Thumb2_Spill(jinfo, 1, 0);
-  r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
-  Thumb2_Spill(jinfo, 1, (1<<r_hi));
-  r_res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~(1<<r_hi)));
-  JASSERT(r_res_lo != r_res_hi, "oops");
-  JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
-  rsb_imm(jinfo->codebuf, r_res_lo, r_lo, 0);
-  r_tmp = Thumb2_Tmp(jinfo, (1<<r_hi)|(1<<r_res_lo));
-  mov_imm(jinfo->codebuf, r_tmp, 0);
-  dop_reg(jinfo->codebuf, DP_SBC, r_res_hi, r_tmp, r_hi, SHIFT_LSL, 0);
-}
-
-void Thumb2_fNeg(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r, r_result;
-
-  Thumb2_Fill(jinfo, 1);
-  r = POP(jstack);
-  Thumb2_Spill(jinfo, 1, 0);
-  r_result = PUSH(jstack, JSTACK_REG(jstack));
-  eor_imm(jinfo->codebuf, r_result, r, 0x80000000);
-}
-
-// arm_op is either DP_EOR (for dnegate) or DP_BIC (for dabs)
-static void Thumb2_dUnaryOp(Thumb2_Info *jinfo, u32 arm_op)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lo, r_hi, r_res_lo, r_res_hi;
-
-  Thumb2_Fill(jinfo, 2);
-  r_lo = POP(jstack);
-  r_hi = POP(jstack);
-  Thumb2_Spill(jinfo, 1, 0);
-  r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
-  Thumb2_Spill(jinfo, 1, (1<<r_hi));
-  r_res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~(1<<r_hi)));
-  JASSERT(r_res_lo != r_res_hi, "oops");
-  JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
-  mov_reg(jinfo->codebuf, r_res_lo, r_lo);
-  dop_imm(jinfo->codebuf, arm_op, r_res_hi, r_hi, 0x80000000);
-}
-
-void Thumb2_dNeg(Thumb2_Info *jinfo)
-{
-  Thumb2_dUnaryOp(jinfo, DP_EOR);
-}
-
-void Thumb2_dAbs(Thumb2_Info *jinfo)
-{
-  Thumb2_dUnaryOp(jinfo, DP_BIC);
-}
-
-void Thumb2_lOp(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned res_lo, res_hi;
-  unsigned lho_lo, lho_hi;
-  unsigned rho_lo, rho_hi;
-
-  Thumb2_Fill(jinfo, 4);
-  rho_lo = POP(jstack);
-  rho_hi = POP(jstack);
-  lho_lo = POP(jstack);
-  lho_hi = POP(jstack);
-  Thumb2_Spill(jinfo, 1, 0);
-  res_hi = PUSH(jstack, JSTACK_REG(jstack));
-  Thumb2_Spill(jinfo, 1, (1<<lho_hi)|(1<<rho_hi));
-  res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_hi)|(1<<rho_hi))));
-  JASSERT(res_lo != rho_hi && res_lo != lho_hi, "res_lo != rho_hi && res_lo != lho_hi");
-  dop_reg(jinfo->codebuf, dOps[opc-opc_ladd], res_lo, lho_lo, rho_lo, SHIFT_LSL, 0);
-  dop_reg(jinfo->codebuf, dOps[opc-opc_ladd+1], res_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
-}
-
-void Thumb2_lmul(Thumb2_Info *jinfo)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned res_lo, res_hi;
-  unsigned lho_lo, lho_hi;
-  unsigned rho_lo, rho_hi;
-  unsigned r_tmp_lo, r_tmp_hi;
-  unsigned op_mask;
-
-  Thumb2_Fill(jinfo, 4);
-  rho_lo = POP(jstack);
-  rho_hi = POP(jstack);
-  lho_lo = POP(jstack);
-  lho_hi = POP(jstack);
-  op_mask = (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi);
-  Thumb2_Spill(jinfo, 2, 0);
-  res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~op_mask));
-  res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~op_mask));
-  r_tmp_lo = res_lo;
-  r_tmp_hi = res_hi;
-  if (op_mask & (1<<r_tmp_lo)) r_tmp_lo = Thumb2_Tmp(jinfo, op_mask);
-  if (op_mask & (1<<r_tmp_hi)) r_tmp_hi = Thumb2_Tmp(jinfo, op_mask|(1<<r_tmp_lo));
-  umull(jinfo->codebuf, r_tmp_lo, r_tmp_hi, rho_lo, lho_lo);
-  mla(jinfo->codebuf, r_tmp_hi, rho_lo, lho_hi, r_tmp_hi);
-  mla(jinfo->codebuf, res_hi, rho_hi, lho_lo, r_tmp_hi);
-  mov_reg(jinfo->codebuf, res_lo, r_tmp_lo);
-}
-
-void Thumb2_fOp(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned rho, lho, res;
-
-  Thumb2_Fill(jinfo, 2);
-  rho = POP(jstack);
-  lho = POP(jstack);
-  Thumb2_Spill(jinfo, 1, 0);
-  res = PUSH(jstack, JSTACK_REG(jstack));
-  vmov_reg_s_toVFP(jinfo->codebuf, VFP_S0, lho);
-  vmov_reg_s_toVFP(jinfo->codebuf, VFP_S1, rho);
-  vop_reg_s(jinfo->codebuf, dOps[opc-opc_iadd], VFP_S0, VFP_S0, VFP_S1);
-  vmov_reg_s_toARM(jinfo->codebuf, res, VFP_S0);
-}
-
-void Thumb2_dOp(Thumb2_Info *jinfo, u32 opc)
-{
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned rho_lo, rho_hi, lho_lo, lho_hi, res_lo, res_hi;
-
-  Thumb2_Fill(jinfo, 4);
-  rho_lo = POP(jstack);
-  rho_hi = POP(jstack);
-  lho_lo = POP(jstack);
-  lho_hi = POP(jstack);
-  Thumb2_Spill(jinfo, 2, 0);
-  res_hi = PUSH(jstack, JSTACK_REG(jstack));
-  res_lo = PUSH(jstack, JSTACK_REG(jstack));
-  vmov_reg_d_toVFP(jinfo->codebuf, VFP_D0, lho_lo, lho_hi);
-  vmov_reg_d_toVFP(jinfo->codebuf, VFP_D1, rho_lo, rho_hi);
-  vop_reg_d(jinfo->codebuf, dOps[opc-opc_iadd], VFP_D0, VFP_D0, VFP_D1);
-  vmov_reg_d_toARM(jinfo->codebuf, res_lo, res_hi, VFP_D0);
-}
-
-void Thumb2_Handler(Thumb2_Info *jinfo, unsigned handler, unsigned opcode, unsigned bci)
-{
-  mov_imm(jinfo->codebuf, ARM_R0, opcode);
-  mov_imm(jinfo->codebuf, ARM_R1, bci);
-  mov_imm(jinfo->codebuf, ARM_IP, 0);
-  str_imm(jinfo->codebuf, ARM_IP, ARM_IP, 0, 1, 0);
-}
-
-void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start);
-
-// called from the SEGV handling code to see if a polling page read
-// is from a legitimate safepoint address
-int Thumb2_Install_Safepoint_PC(ucontext_t *uc, int magicByteOffset)
-{
-  mcontext_t *mc = &uc->uc_mcontext;
-  unsigned long arm_pc = mc->arm_pc;
-  // ensure the faulting instruction lies in JITted code
-  if (arm_pc < (unsigned long)(thumb2_codebuf + 1)) {
-    return false;
-  }
-  if (arm_pc >= (unsigned long)thumb2_codebuf->sp) {
-    return false;
-  }
-  // skip to the MAGIC word and check it is valid
-  arm_pc +=magicByteOffset;
-  if (*((short*)arm_pc) != (short)THUMB2_POLLING_PAGE_MAGIC) {
-    return false;
-  }
-
-  // skip the magic word 
-  arm_pc += 2;
-  mc->arm_pc = arm_pc;
-
-  return true;
-}
-
-// Insert code to poll the SafepointSynchronize state and call
-// Helper_SafePoint.
-// -- if offset is negative it identifies a bytecode index which
-// should be jumped to via an unconditional backward branch
-// taken either before or after executing the safepoint check
-// -- if offset is zero or positive then a return or conditional
-// branch, respectively, needs to be compiled so control should
-// flow to end of the safepoint check whether or not it is executed
-
-void Thumb2_Safepoint(Thumb2_Info *jinfo, int stackdepth, int bci, int offset)
-{
-  // normal case: read the polling page and branch to skip
-  // the safepoint test
-  // abnormal case: read the polling page, trap to handler
-  // which resets return address into the safepoint check code
-  //
-  // with a negative offset the generated code will look like
-  //    movw r_tmp, #polling_page
-  //    movt r_tmp, #polling_page
-  //    ldr r_tmp, [r_tmp, #K] ; K == 2 * byte offset to the magic word
-  //    b.n #branchtarget
-  //    #POLLING_PAGE_MAGIC ; magic data word
-  //    <
-  //     safepoint check  code
-  //    >
-  //    b.n #branchtarget
-  //
-  // i.e. the generated code includes the branch backwards twice
-  // and relies on a fault at the ldr to skip into the safepoint code
-  //
-  // with a zero or positive offset the caller will plant the return
-  // (zero) or conditional branch (positive) code after the check so
-  // the normal path skips round the safepoint check code and the
-  // abnormal path just drops through. the generated code will look
-  // like
-  //
-  //    movw r_tmp, #polling_page
-  //    movt r_tmp, #polling_page
-  //    ldr r_tmp, [r_tmp, #0]
-  //    b.n L1
-  //    POLLING_PAGE_MAGIC ; data
-  //    <
-  //     safepoint check  code
-  //    >
-  // L1:
-  //    <caller plants branch/return here>
-  //
-  //  n.b. for a return there is no need save or restore locals
-
-  bool is_return = offset == 0; // This is some kind of return bytecode
-
-  int r_tmp = Thumb2_Tmp(jinfo, 0);
-  unsigned dest;
-  if (offset < 0) {
-    // the index of the backward branch target in the code buffer
-    dest = jinfo->bc_stackinfo[bci+offset] & ~BC_FLAGS_MASK;
-  } else {
-    dest = 0;
-  }
-  mov_imm(jinfo->codebuf, r_tmp, (u32)os::get_polling_page());
-  // this encodes the offset from the read instruction to the magic
-  // word into the fault address, assuming it is 4 bytes. however, if
-  // we need to plant a wide backwards branch we may need to rewrite
-  // this instruction with offset 6. so stash the instruction location
-  // here just in case. n.b. the offset is doubled to ensure the fault
-  // address in aligned -- aligned reads always use a single 16-bit
-  // instruction whereas non-aligned reads require 2 x 16 bit words
-  unsigned read_loc = out_loc(jinfo->codebuf);
-  ldr_imm(jinfo->codebuf, r_tmp, r_tmp, 8, 1, 0);
-  if (offset < 0) {
-    branch_uncond(jinfo->codebuf, dest);
-    unsigned magic_loc = out_loc(jinfo->codebuf);
-    if (magic_loc - read_loc != 4) {
-      JASSERT(magic_loc - read_loc == 6, "bad safepoint offset to magic word");
-      // must have needed a wide branch so patch the load instruction
-      jinfo->codebuf->idx = read_loc >> 1;
-      ldr_imm(jinfo->codebuf, r_tmp, r_tmp, 12, 1, 0);
-      jinfo->codebuf->idx = magic_loc >> 1;
-    }
-  } else {
-    // leave space for the forward skip branch
-    // location of branch instruction is read_loc + 2
-    forward_16(jinfo->codebuf);
-  }
-  // now write a magic word after the branch so the signal handler can
-  // test that a polling page read is kosher
-  out_16(jinfo->codebuf, THUMB2_POLLING_PAGE_MAGIC);
-
-  {
-    // Flush the stack to memory and save its register state.
-    SAVE_STACK(jinfo->jstack);
-    Thumb2_Flush(jinfo);
-
-    // We don't save or restore locals if we're returning.
-    if (! is_return)
-      Thumb2_save_local_refs(jinfo, stackdepth);
-
-    // now the safepoint polling code itself
-    mov_imm(jinfo->codebuf, ARM_R1, bci+CONSTMETHOD_CODEOFFSET);
-    add_imm(jinfo->codebuf, ARM_R2, ISTATE_REG(jinfo),
-	    ISTATE_OFFSET(jinfo, stackdepth, 0));
-    bl(jinfo->codebuf, handlers[H_SAFEPOINT]);
-
-    if (! is_return)
-      Thumb2_restore_local_refs(jinfo, stackdepth);
-
-    RESTORE_STACK(jinfo->jstack, jinfo->codebuf);
-
-    if (offset < 0) {
-      // needs another unconditional backward branch
-      branch_uncond(jinfo->codebuf, dest);
-    } else {
-      // patch in the forward skip branch
-      branch_narrow_patch(jinfo->codebuf, read_loc + 2);
-    }
-  }
-}
-
-// If this is a backward branch, compile a safepoint check
-void Thumb2_Cond_Safepoint(Thumb2_Info *jinfo, int stackdepth, int bci) {
-  int offset = GET_JAVA_S2(jinfo->code_base + bci + 1);
-  unsigned dest_taken = bci + offset;
-
-  if (jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
-    // pass offset as positive so the safepoint code plant a forward
-    // skip over the test rather than doing an unconditional backwards
-    // branch. that allows the condition test to be planted by
-    // whatever followed this call
-    Thumb2_Safepoint(jinfo, stackdepth, bci, -offset);
-  }
-}
-
-int Thumb2_Branch(Thumb2_Info *jinfo, unsigned bci, unsigned cond)
-{
-    int offset = GET_JAVA_S2(jinfo->code_base + bci + 1);
-    unsigned dest_taken = bci + offset;
-    unsigned dest_not_taken = bci + 3;
-    unsigned loc;
-
-    if (jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
-      branch(jinfo->codebuf, cond, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
-      return dest_not_taken;
-    }
-    loc = forward_32(jinfo->codebuf);
-    Thumb2_codegen(jinfo, dest_not_taken);
-    JASSERT(jinfo->bc_stackinfo[dest_taken] & BC_COMPILED, "dest in branch not compiled!!!");
-    branch_patch(jinfo->codebuf, cond, loc, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
-    return -1;
-}
-
-int Thumb2_Goto(Thumb2_Info *jinfo, unsigned bci, int offset, int len, int stackdepth = -1)
-{
-    unsigned dest_taken = bci + offset;
-    unsigned dest_not_taken = bci + len;
-    unsigned loc;
-
-    if (stackdepth >= 0
-	&& jinfo->bc_stackinfo[dest_taken] & BC_COMPILED) {
-      // n.b. the backwards branch will be planted by the safepoint routine
-      Thumb2_Safepoint(jinfo, stackdepth, bci, offset);
-      return dest_not_taken;
-    }
-    loc = forward_32(jinfo->codebuf);
-    Thumb2_codegen(jinfo, dest_not_taken);
-    JASSERT(jinfo->bc_stackinfo[dest_taken] & BC_COMPILED, "dest in goto not compiled!!!");
-    branch_uncond_patch(jinfo->codebuf, loc, jinfo->bc_stackinfo[dest_taken] & ~BC_FLAGS_MASK);
-    return -1;
-}
-
-void Thumb2_save_local_refs(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if ((locals_info[i] & (1 << LOCAL_REF)) && (locals_info[i] & (1 << LOCAL_MODIFIED))) {
-	store_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_restore_local_refs(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if (locals_info[i] & (1<<LOCAL_REF)) {
-	load_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_save_all_locals(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if (locals_info[i] & (1 << LOCAL_MODIFIED)) {
-	store_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_restore_all_locals(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-	load_local(jinfo, r, i, stackdepth);
-    }
-  }
-}
-
-void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth)
-{
-    Thumb2_Flush(jinfo);
-    Thumb2_save_all_locals(jinfo, stackdepth);
-    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-    bl(jinfo->codebuf, handlers[handler]);
-}
-
-void Thumb2_Return(Thumb2_Info *jinfo, unsigned opcode, int bci, int stackdepth)
-{
-  Thumb2_Safepoint(jinfo, stackdepth, bci, 0);
-
-  Reg r_lo, r;
-  Thumb2_Stack *jstack = jinfo->jstack;
-
-  if (jinfo->method->has_monitor_bytecodes()) {
-    Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
-  }
-
-  if (jinfo->method->is_synchronized()) {
-    unsigned loc_success1, loc_success2, loc_failed, loc_retry, loc_exception;
-    unsigned loc_illegal_monitor_state;
-    Thumb2_Flush(jinfo);
-//    Thumb2_save_local_refs(jinfo);
-    // Free the monitor
-    //
-    // 		add	r1, #<stackdepth>-8
-    // 		ldr	r2, [r1, #4]
-    //		cbz	r2, throw_illegal_monitor_state
-    //		ldr	r0, [r1, #0]
-    //		mov	r3, #0
-    //		str	r3, [r1, #4]
-    //		cbz	r0, success
-    //	retry:
-    //		ldrex	r3, [r2, #0]
-    //		cmp	r1, r3
-    //		bne	failed
-    //		strex	r3, r0, [r2, #0]
-    //		cbz	r3, success
-    //		b	retry
-    //	failed:
-    //		str	r2, [r1, #4]
-    //		...
-    //  success:
-    //
-    // JAZ_V1 == tmp2
-    // JAZ_V2 == tmp1
-    add_imm(jinfo->codebuf, ARM_R1, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) - frame::interpreter_frame_monitor_size()*wordSize);
-
-    ldr_imm(jinfo->codebuf, ARM_R2, ARM_R1, 4, 1, 0);
-    loc_illegal_monitor_state = forward_16(jinfo->codebuf);
-    ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0, 1, 0);
-    mov_imm(jinfo->codebuf, ARM_R3, 0);
-    str_imm(jinfo->codebuf, ARM_R3, ARM_R1, 4, 1, 0);
-    loc_success1 = forward_16(jinfo->codebuf);
-    loc_retry = out_loc(jinfo->codebuf);
-    ldrex_imm(jinfo->codebuf, ARM_R3, ARM_R2, 0);
-    cmp_reg(jinfo->codebuf, ARM_R1, ARM_R3);
-    loc_failed = forward_16(jinfo->codebuf);
-    strex_imm(jinfo->codebuf, ARM_R3, ARM_R0, ARM_R2, 0);
-    loc_success2 = forward_16(jinfo->codebuf);
-    branch_uncond(jinfo->codebuf, loc_retry);
-    bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
-    cbz_patch(jinfo->codebuf, ARM_R2, loc_illegal_monitor_state);
-    str_imm(jinfo->codebuf, ARM_R2, ARM_R1, 4, 1, 0);
-    mov_imm(jinfo->codebuf, ARM_R0, 0+CONSTMETHOD_CODEOFFSET);
-    bl(jinfo->codebuf, handlers[H_SYNCHRONIZED_EXIT]);
-    loc_exception = forward_16(jinfo->codebuf);
-    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-    cbz_patch(jinfo->codebuf, ARM_R0, loc_exception);
-    cbz_patch(jinfo->codebuf, ARM_R0, loc_success1);
-    cbz_patch(jinfo->codebuf, ARM_R3, loc_success2);
-  }
-
-  if (opcode == opc_return) {
-    if (jinfo->compiled_return) {
-      unsigned ret_idx = jinfo->compiled_return;
-
-      branch_uncond(jinfo->codebuf, ret_idx);
-      return;
-    }
-    jinfo->compiled_return = jinfo->codebuf->idx * 2;
-  } else {
-    if (opcode == opc_lreturn || opcode == opc_dreturn) {
-      Thumb2_Fill(jinfo, 2);
-      r_lo = POP(jstack);
-      r = POP(jstack);
-    } else {
-      Thumb2_Fill(jinfo, 1);
-      r = POP(jstack);
-      if (jinfo->compiled_word_return[r]) {
-        unsigned ret_idx = jinfo->compiled_word_return[r];
-
-        branch_uncond(jinfo->codebuf, ret_idx);
-        return;
-      }
-      jinfo->compiled_word_return[r] = jinfo->codebuf->idx * 2;
-    }
-  }
-
-  mov_imm(jinfo->codebuf, ARM_LR, 0);
-  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
-  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 0, 1, 0);
-
-  if (opcode == opc_return) {
-    add_imm(jinfo->codebuf, Rstack, Rstack, jinfo->method->max_locals() * sizeof(int) + 4);
-  } else {
-    if (opcode == opc_lreturn || opcode == opc_dreturn) {
-      str_imm(jinfo->codebuf, r, Rstack, jinfo->method->max_locals() * sizeof(int), 1, 0);
-      str_imm(jinfo->codebuf, r_lo, Rstack, jinfo->method->max_locals() * sizeof(int)-4, 1, 1);
-    } else {
-      str_imm(jinfo->codebuf, r, Rstack, jinfo->method->max_locals() * sizeof(int), 1, 1);
-    }
-  }
-
-  str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-
-  // deoptimized_frames = 0
-  // FIXME: This should be done in the slow entry, but only three
-  // words are allocated there for the instructions.
-  mov_imm(jinfo->codebuf, ARM_R0, 0);
-
-  ldm(jinfo->codebuf, C_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
-}
-
-int Thumb2_Accessor(Thumb2_Info *jinfo)
-{
-  jubyte *code_base = jinfo->code_base;
-  constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-  ConstantPoolCacheEntry* cache;
-  int index = GET_NATIVE_U2(code_base+2);
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-
-  JASSERT(code_base[0] == opc_aload_0 || code_base[0] == opc_iaccess_0, "not an aload_0 in accessor");
-  JASSERT(code_base[4] == opc_ireturn || code_base[4] == opc_areturn, "not an ireturn in accessor");
-  cache = cp->entry_at(index);
-  if (!cache->is_resolved((Bytecodes::Code)opc_getfield)) return 0;
-
-  TosState tos_type = cache->flag_state();
-  int field_offset = cache->f2_as_index();
-
-  // Slow entry point - callee save
-  // R0 = method
-  // R2 = thread
-  stm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
-  bl(jinfo->codebuf, out_pos(jinfo->codebuf) + FAST_ENTRY_OFFSET - 6);
-  ldm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_PC), ARM_SP, POP_FD, 1);
-  out_16(jinfo->codebuf, 0);
-
-  out_32(jinfo->codebuf, 0);	// pointer to osr table
-  out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
-  out_32(jinfo->codebuf, 0);	// next compiled method
-
-  out_32(jinfo->codebuf, -1);    // regusage
-  out_32(jinfo->codebuf, -1);
-  out_32(jinfo->codebuf, -1);
-
-  out_align(jinfo->codebuf, CODE_ALIGN);
-
-  // fast entry point
-  bc_stackinfo[0] = (bc_stackinfo[0] & BC_FLAGS_MASK) | (jinfo->codebuf->idx * 2) | BC_COMPILED;
-  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_JAVA_SP, 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0, 1, 0);
-  if (tos_type == btos)
-    ldrsb_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset, 1, 0);
-  else if (tos_type == ctos)
-    ldrh_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset, 1, 0);
-  else if (tos_type == stos)
-    ldrsh_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset, 1, 0);
-  else
-    ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0, 1, 0);
-
-  if (cache->is_volatile())
-    fullBarrier(jinfo->codebuf);
-
-  // deoptimized_frames = 0
-  mov_imm(jinfo->codebuf, ARM_R0, 0);
-  mov_reg(jinfo->codebuf, ARM_PC, ARM_LR);
-
-  return 1;
-}
-
-#define STACKDEPTH(jinfo, stackinfo) (((stackinfo) & ~BC_FLAGS_MASK) + \
-	((jinfo)->method->is_synchronized() ? frame::interpreter_frame_monitor_size() : 0))
-
-
-void Thumb2_Enter(Thumb2_Info *jinfo)
-{
-  int parms = jinfo->method->size_of_parameters();
-  int extra_locals = jinfo->method->max_locals() - parms;
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-  unsigned stackdepth = 0;
-
-  // Slow entry point - callee save
-  // R0 = method
-  // R2 = thread
-  stm(jinfo->codebuf, I_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
-  bl(jinfo->codebuf, out_pos(jinfo->codebuf) + FAST_ENTRY_OFFSET - 6);
-  ldm(jinfo->codebuf, I_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
-  out_16(jinfo->codebuf, 0);
-
-  out_32(jinfo->codebuf, 0);	// Space for osr_table pointer
-  out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
-  out_32(jinfo->codebuf, 0);	// Pointer to next method
-
-  out_32(jinfo->codebuf, 0);    // regusage
-  out_32(jinfo->codebuf, 0);
-  out_32(jinfo->codebuf, 0);
-
-  out_align(jinfo->codebuf, CODE_ALIGN);
-
-  // Fast entry point == Slow entry + 64 - caller save
-  // R0 = method
-  // R2 = thread
-  stm(jinfo->codebuf, C_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-  {
-    unsigned stacksize;
-
-    stacksize = (extra_locals + jinfo->method->max_stack()) * sizeof(int);
-    stacksize += FRAME_SIZE + STACK_SPARE;
-    if (!jinfo->is_leaf || stacksize > LEAF_STACK_SIZE) {
-      ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_STACK_BASE, 1, 0);
-      sub_imm(jinfo->codebuf, ARM_R1, Rstack, stacksize + LEAF_STACK_SIZE);
-      cmp_reg(jinfo->codebuf, ARM_R3, ARM_R1);
-      it(jinfo->codebuf, COND_CS, IT_MASK_T);
-      bl(jinfo->codebuf, handlers[H_STACK_OVERFLOW]);
-    }
-  }
-  mov_imm(jinfo->codebuf, ARM_R1, 0);
-
-  if (extra_locals > 0) {
-    sub_imm(jinfo->codebuf, Rstack, Rstack, extra_locals * 4);
-
-    for (i = 0; i < extra_locals; i++) {
-      unsigned linfo = locals_info[parms+i];
-      if (linfo & (1<< LOCAL_REF) || ((linfo >> LOCAL_INT) & 0x1f) == 0)
-	str_imm(jinfo->codebuf, ARM_R1, Rstack, (extra_locals-1 - i) * 4, 1, 0);
-    }
-  }
-
-  ldr_imm(jinfo->codebuf, ARM_IP, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_IP, ARM_IP, METHOD_CONSTANTS, 1, 0);
-
-  add_imm(jinfo->codebuf, Rlocals, Rstack, (jinfo->method->max_locals()-1) * sizeof(int));
-
-  sub_imm(jinfo->codebuf, Rstack, Rstack, FRAME_SIZE);
-
-  if (jinfo->use_istate) mov_reg(jinfo->codebuf, Ristate, Rstack);
-  store_istate(jinfo, Rstack, ISTATE_SELF_LINK, stackdepth);
-
-  store_istate(jinfo, Rstack, ISTATE_MONITOR_BASE, stackdepth);
-
-  store_istate(jinfo, Rlocals, ISTATE_LOCALS, stackdepth);
-
-  if (jinfo->method->is_synchronized()) {
-    sub_imm(jinfo->codebuf, Rstack, Rstack, frame::interpreter_frame_monitor_size()*wordSize);
-    stackdepth = frame::interpreter_frame_monitor_size();
-    if (jinfo->method->is_static()) {
-      ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_POOL_HOLDER, 1, 0);
-      ldr_imm(jinfo->codebuf, JAZ_V1, ARM_R3, KLASS_PART+KLASS_JAVA_MIRROR, 1, 0);
-    } else {
-      ldr_imm(jinfo->codebuf, JAZ_V1, Rlocals, 0, 1, 0);
-    }
-    str_imm(jinfo->codebuf, JAZ_V1, Rstack, 4, 1, 0);
-  }
-
-  store_istate(jinfo, ARM_R1, ISTATE_MSG, stackdepth);
-  store_istate(jinfo, ARM_R1, ISTATE_OOP_TEMP, stackdepth);
-
-  sub_imm(jinfo->codebuf, ARM_R3, Rstack, jinfo->method->max_stack() * sizeof(int));
-  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP, 1, 0);
-
-  store_istate(jinfo, Rstack, ISTATE_STACK_BASE, stackdepth);
-
-  sub_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4);
-  store_istate(jinfo, ARM_R3, ISTATE_STACK_LIMIT, stackdepth);
-
-  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  store_istate(jinfo, ARM_R3, ISTATE_NEXT_FRAME, stackdepth);
-
-  mov_imm(jinfo->codebuf, ARM_R3, INTERPRETER_FRAME);
-  store_istate(jinfo, ARM_R3, ISTATE_FRAME_TYPE, stackdepth);
-
-  mov_imm(jinfo->codebuf, ARM_R1, 0);   // set last SP to zero before
-                                        // setting FP
-  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-  add_imm(jinfo->codebuf, ARM_R3, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) + ISTATE_NEXT_FRAME);
-  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-
-  ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_CACHE, 1, 0);
-  store_istate(jinfo, ARM_R3, ISTATE_CONSTANTS, stackdepth);
-
-  store_istate(jinfo, Rthread, ISTATE_THREAD, stackdepth);
-  store_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
-
-  if (jinfo->method->is_synchronized()) {
-    unsigned loc_retry, loc_failed, loc_success, loc_exception;
-
-    // JAZ_V1 == monitor object
-    //
-    // Try to acquire the monitor. Seems very sub-optimal
-    // 		ldr	r3, [JAZ_V1, #0]
-    // 		orr	r3, r3, #1
-    // 		str	r3, [Rstack, #0]
-    // 	retry:
-    // 		ldrex	r0, [JAZ_V1, #0]
-    // 		cmp	r3, r0
-    // 		bne	failed
-    // 		strex	r0, Rstack, [JAZ_V1, #0]
-    // 		cbz	r0, success
-    // 		b	retry
-    // 	failed:
-    // 		<failed - someone else has the monitor - must yield>
-    //  success:
-    // 		<success - acquired the monitor>
-    //
-    ldr_imm(jinfo->codebuf, ARM_R3, JAZ_V1, 0, 1, 0);
-    orr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 1);
-    str_imm(jinfo->codebuf, ARM_R3, Rstack, 0, 1, 0);
-    loc_retry = out_loc(jinfo->codebuf);
-// retry:
-    ldrex_imm(jinfo->codebuf, ARM_R0, JAZ_V1, 0);
-    cmp_reg(jinfo->codebuf, ARM_R3, ARM_R0);
-    loc_failed = forward_16(jinfo->codebuf);
-    strex_imm(jinfo->codebuf, ARM_R0, Rstack, JAZ_V1, 0);
-    loc_success = forward_16(jinfo->codebuf);
-    branch_uncond(jinfo->codebuf, loc_retry);
-    bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
-// failed:
-    mov_imm(jinfo->codebuf, ARM_R0, 0+CONSTMETHOD_CODEOFFSET);
-    bl(jinfo->codebuf, handlers[H_SYNCHRONIZED_ENTER]);
-    loc_exception = forward_16(jinfo->codebuf);
-    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
-    cbz_patch(jinfo->codebuf, ARM_R0, loc_exception);
-    cbz_patch(jinfo->codebuf, ARM_R0, loc_success);
-// success:
-
-  }
-
-  {
-    int nlocals = jinfo->method->max_locals();
-
-    for (i = 0; i < nlocals; i++) {
-      Reg r = jinfo->jregs->r_local[i];
-      if (r) {
-	unsigned stackdepth = STACKDEPTH(jinfo, 0);
-        if (i < parms)
-	  load_local(jinfo, r, i, stackdepth);
-        else if (locals_info[i] & (1<<LOCAL_REF))
-          mov_reg(jinfo->codebuf, r, ARM_R1);
-      }
-    }
-  }
-}
-
-unsigned opcode2handler[] = {
-  H_IDIV,
-  H_LDIV,
-  0, 0,			// fdiv, ddiv
-  H_IREM,
-  H_LREM,
-  H_FREM,
-  H_DREM,
-  0, 0, 0, 0,		// ineg, lneg, fneg, dneg
-  0, 0, 0, 0, 0, 0,	// shifts
-  0, 0, 0, 0, 0, 0,	// and, or, xor
-  0,			// iinc
-  0,			// i2l
-  H_I2F,
-  H_I2D,
-  0,			// l2i
-  H_L2F,
-  H_L2D,
-  H_F2I,
-  H_F2L,
-  H_F2D,
-  H_D2I,
-  H_D2L,
-  H_D2F,
-};
-
-// Generate code for a load of a jlong.
-
-void Thumb2_load_long(Thumb2_Info *jinfo, Reg r_lo, Reg r_hi, Reg base,
-		      int field_offset,
-		      bool is_volatile = false)
-{
-  CodeBuf *codebuf = jinfo->codebuf;
-  if (is_volatile) {
-    Reg r_addr = base;
-    if (field_offset) {
-      r_addr = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base));
-      add_imm(jinfo->codebuf, r_addr, base, field_offset);
-    }
-    ldrexd(codebuf, r_lo, r_hi, r_addr);
-  } else {
-    ldrd_imm(codebuf, r_lo, r_hi, base, field_offset, 1, 0);
-  }
-}
-
-// Generate code for a store of a jlong.  If the operand is volatile,
-// generate a sequence of the form
-//
-// .Ldst
-// 	ldrexd 	r2, r3, [dst]
-// 	strexd 	r2, r0, r1, [dst]
-// 	cmp 	r2, #0
-// 	bne 	.Ldst
-
-void Thumb2_store_long(Thumb2_Info *jinfo, Reg r_lo, Reg r_hi, Reg base,
-		      int field_offset,
-		      bool is_volatile = false)
-{
-  CodeBuf *codebuf = jinfo->codebuf;
-  if (is_volatile) {
-    Reg r_addr = base;
-    Reg tmp1 = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base));
-    Reg tmp2 = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base) | (1<<tmp1));
-    if (field_offset) {
-      r_addr = Thumb2_Tmp(jinfo, (1<<r_lo) | (1<<r_hi) | (1<<base) | (1<<tmp1) | (1<<tmp2));
-      add_imm(jinfo->codebuf, r_addr, base, field_offset);
-    }
-    int loc = out_loc(codebuf);
-    ldrexd(codebuf, tmp1, tmp2, r_addr);
-    strexd(codebuf, tmp1, r_lo, r_hi, r_addr);
-    cmp_imm(codebuf, tmp1, 0);
-    branch(codebuf, COND_NE, loc);
-  } else {
-    strd_imm(codebuf, r_lo, r_hi, base, field_offset, 1, 0);
-  }
-}
-
-#define OPCODE2HANDLER(opc) (handlers[opcode2handler[(opc)-opc_idiv]])
-
-extern "C" void _ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc(void);
-
-// Push VFP_REG to the java stack.
-static void vfp_to_jstack(Thumb2_Info *jinfo, int vfp_reg) {
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lo, r_hi;
-  r_hi = PUSH(jstack, JSTACK_REG(jstack));
-  r_lo = PUSH(jstack, JSTACK_REG(jstack));
-  vmov_reg_d_toARM(jinfo->codebuf, r_lo, r_hi, vfp_reg);
-}
-
-// Pop the java stack to VFP_REG .
-static void jstack_to_vfp(Thumb2_Info *jinfo, int vfp_reg) {
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned r_lo, r_hi;
-  Thumb2_Fill(jinfo, 2);
-  r_lo = POP(jstack);
-  r_hi = POP(jstack);
-  vmov_reg_d_toVFP(jinfo->codebuf, vfp_reg, r_lo, r_hi);
-  Thumb2_Flush(jinfo);
-}
-
-// Expand a call to a "special" method.  These are usually inlines of
-// java.lang.Math methods.  Return true if the inlining succeeded.
-static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo,
-				  unsigned stackdepth) {
-  Thumb2_Stack *jstack = jinfo->jstack;
-  CodeBuf *codebuf = jinfo->codebuf;
-
-  const char *entry_name;
-
-  switch (callee->intrinsic_id()) {
-  case vmIntrinsics::_dabs:
-   {
-     Thumb2_dAbs(jinfo);
-     return true;
-    }
-
-#ifdef __ARM_PCS_VFP
-  case vmIntrinsics::_dsin:
-    entry_name = "Java_java_lang_StrictMath_sin";
-    break;
-
-  case vmIntrinsics::_dcos:
-    entry_name = "Java_java_lang_StrictMath_cos";
-    break;
-
-  case vmIntrinsics::_dtan:
-    entry_name = "Java_java_lang_StrictMath_tan";
-    break;
-
-  case vmIntrinsics::_dsqrt:
-    {
-      void *entry_point = dlsym(NULL, "Java_java_lang_StrictMath_sqrt");
-      if (! entry_point)
-	return false;
-
-      unsigned r_lo, r_hi, r_res_lo, r_res_hi;
-
-      // Make sure that canonical NaNs are returned, as per the spec.
-      //
-      // Generate:
-      // vsqrt.f64 d0, d1
-      // vcmp.f64 d0, d0
-      // vmrs APSR_nzcv, fpscr
-      // beq.n 0f
-      // vmov.f64 d0, d1
-      // blx Java_java_lang_StrictMath_sqrt
-      // 0:
-      jstack_to_vfp(jinfo, VFP_D1);
-      vop_reg_d(jinfo->codebuf, VP_SQRT, VFP_D0, 0, VFP_D1);
-      vcmp_reg_d(jinfo->codebuf, VFP_D0, VFP_D0, 0);
-      vmrs(jinfo->codebuf, ARM_PC);
-      int loc = forward_16(jinfo->codebuf);
-      vmov_reg_d_VFP_to_VFP(jinfo->codebuf, VFP_D0, VFP_D1);
-      // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
-      // parameter, so it's arguably pointless to pass it here.
-      add_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_JNI_ENVIRONMENT);
-      mov_imm(jinfo->codebuf, ARM_IP, (unsigned)entry_point);
-      blx_reg(jinfo->codebuf, ARM_IP);
-      bcc_patch(jinfo->codebuf, COND_EQ, loc);
-      vfp_to_jstack(jinfo, VFP_D0);
-
-      return true;
-    }
-
-  case vmIntrinsics::_dlog:
-    entry_name = "Java_java_lang_StrictMath_log";
-    break;
-
-  case vmIntrinsics::_dlog10:
-    entry_name = "Java_java_lang_StrictMath_log10";
-    break;
-#endif // __ARM_PCS_VFP
-
-  case vmIntrinsics::_compareAndSwapInt:
-   {
-      Thumb2_Fill(jinfo, 4);
-
-      unsigned update = POP(jstack);
-      unsigned expect = POP(jstack);
-      unsigned offset = POP(jstack);
-      POP(jstack);  // Actually the high part of the offset
-
-      // unsigned object = POP(jstack);
-      // unsigned unsafe = POP(jstack);  // Initially an instance of java.lang.Unsafe
-
-      Thumb2_Flush(jinfo);
-      // Get ourself a result reg that's not one of the inputs
-      unsigned exclude = (1<<update)|(1<<expect)|(1<<offset);
-      unsigned result = JSTACK_PREFER(jstack, ~exclude);
-
-      ldm(codebuf, (1<<ARM_IP)|(1<<ARM_LR), Rstack, POP_FD, 1); // Object addr
-      add_reg(codebuf, result, offset, ARM_IP); // result now points to word
-      ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0);  // Security check
-
-      fullBarrier(codebuf);
-
-      int retry = out_loc(codebuf);
-      ldrex_imm(codebuf, ARM_LR, result, 0);
-      cmp_reg(codebuf, ARM_LR, expect);
-      int loc_failed = forward_16(codebuf);
-      strex_imm(codebuf, ARM_IP, update, result, 0);
-      cmp_imm(codebuf, ARM_IP, 0);
-      branch(codebuf, COND_NE, retry);
-      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
-
-      it(codebuf, COND_NE, IT_MASK_TEE);
-      mov_imm(codebuf, result, 0);
-      mov_imm(codebuf, result, 1);
-      fullBarrier(codebuf);
-
-      PUSH(jstack, result);
-    }
-    return true;
-
-  case vmIntrinsics::_compareAndSwapLong:
-    {
-      Thumb2_Fill(jinfo, 4);
-
-      unsigned update_lo = POP(jstack);
-      unsigned update_hi = POP(jstack);
-      unsigned expect_lo = POP(jstack);
-      unsigned expect_hi = POP(jstack);
-
-      Thumb2_Flush(jinfo);
-      Thumb2_save_all_locals(jinfo, stackdepth - 4); // 4 args popped above
-
-      // instance of java.lang.Unsafe:
-      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 3 * wordSize, 1, 0);
-      ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0);  // Security check
-
-      // Object:
-      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 2 * wordSize, 1, 0);
-      // Offset:
-      ldr_imm(jinfo->codebuf, ARM_IP, Rstack, 0 * wordSize, 1, 0);
-      add_reg(codebuf, ARM_LR, ARM_LR, ARM_IP); // ARM_LR now points to word
-
-      fullBarrier(codebuf);
-
-      int retry = out_loc(codebuf);
-      ldrexd(codebuf, JAZ_V2, JAZ_V3, ARM_LR);
-      cmp_reg(codebuf, JAZ_V2, expect_lo);
-      it(jinfo->codebuf, COND_EQ, IT_MASK_T);
-      cmp_reg(codebuf, JAZ_V3, expect_hi);
-
-      int loc_failed = forward_16(codebuf);
-      strexd(codebuf, JAZ_V1, update_lo, update_hi, ARM_LR);
-      cmp_imm(codebuf, JAZ_V1, 0);
-      branch(codebuf, COND_NE, retry);
-      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
-
-      unsigned result = JSTACK_REG(jinfo->jstack);
-
-      it(codebuf, COND_NE, IT_MASK_TEE);
-      mov_imm(codebuf, result, 0);
-      mov_imm(codebuf, result, 1);
-      fullBarrier(codebuf);
-
-      Thumb2_restore_all_locals(jinfo, stackdepth - 4); // 4 args popped above
-      add_imm(codebuf, Rstack, Rstack, 4 * wordSize);
-      PUSH(jstack, result);
-    }
-    return true;
-
-  default:
-    return false;
-  }
-
-  void *entry_point = dlsym(NULL, entry_name);
-  if (! entry_point)
-    return false;
-
-  jstack_to_vfp(jinfo, VFP_D0);
-  // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
-  // parameter, so it's arguably pointless to pass it here.
-  add_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_JNI_ENVIRONMENT);
-  mov_imm(jinfo->codebuf, ARM_IP, (unsigned)entry_point);
-  blx_reg(jinfo->codebuf, ARM_IP);
-  vfp_to_jstack(jinfo, VFP_D0);
-
-  return true;
-}
-
-void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start)
-{
-  JDEBUG_ (
-  Symbol *name = jinfo->method->name();
-  Symbol *sig = jinfo->method->signature();
-  );
-  unsigned code_size = jinfo->code_size;
-  jubyte *code_base = jinfo->code_base;
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-  CodeBuf *codebuf = jinfo->codebuf;
-  Thumb2_Stack *jstack = jinfo->jstack;
-  unsigned bci;
-  unsigned opcode;
-  unsigned stackinfo;
-  int len;
-  unsigned stackdepth;
-
-  for (bci = start; bci < code_size; ) {
-    opcode = code_base[bci];
-    stackinfo = bc_stackinfo[bci];
-#ifdef T2_PRINT_DISASS
-    unsigned start_idx;
-#endif
-
-    if (stackinfo & BC_BRANCH_TARGET) Thumb2_Flush(jinfo);
-
-    if (!OSPACE && (stackinfo & BC_BACK_TARGET)) {
-      if (out_pos(codebuf) & 0x02) nop_16(codebuf);
-      if (out_pos(codebuf) & 0x04) nop_32(codebuf);
-    }
-
-#ifdef T2_PRINT_DISASS
-    start_idx = jinfo->codebuf->idx;
-    if (start_bci[start_idx] == -1) start_bci[start_idx] = bci;
-#endif
-
-    JASSERT(!(stackinfo & BC_COMPILED), "code already compiled for this bytecode?");
-    stackdepth = STACKDEPTH(jinfo, stackinfo); // Stackdepth here is adjusted for monitors
-    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2) | BC_COMPILED;
-
-    if (opcode > OPC_LAST_JAVA_OP)
-      switch (opcode) {
-      default:
-	if (Bytecodes::is_defined((Bytecodes::Code)opcode))
-	  opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
-	break;
-      case opc_return_register_finalizer:
-      case opc_fast_aldc_w:
-      case opc_fast_aldc:
-	break;
-      }
-
-    len = Bytecodes::length_for((Bytecodes::Code)opcode);
-    if (len <= 0) {
-      Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
-      len = (Bytecodes::special_length_at
-	     (code,
-	      (address)(code_base+bci), (address)(code_base+code_size)));
-    }
-
-    if (IS_DEAD(stackinfo)) {
-      unsigned zlen = 0;
-#ifdef T2_PRINT_DISASS
-      unsigned start_bci = bci;
-#endif
-
-      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
-      do {
-	zlen += len;
-	bci += len;
-	if (bci >= code_size) break;
-	opcode = code_base[bci];
-	stackinfo = bc_stackinfo[bci];
-
-	if (stackinfo & BC_BRANCH_TARGET) break;
-	if (!IS_DEAD(stackinfo)) break;
-
-	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
-
-	if (opcode > OPC_LAST_JAVA_OP) {
-	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
-	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
-	}
-
-	len = Bytecodes::length_for((Bytecodes::Code)opcode);
-	if (len <= 0) {
-	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
-	  len = (Bytecodes::special_length_at
-		 (code,
-		  (address)(code_base+bci), (address)(code_base+code_size)));
-	}
-
-      } while (1);
-#ifdef T2_PRINT_DISASS
-      end_bci[start_idx] = start_bci + zlen;
-#endif
-      continue;
-    }
-
-#if 0
-    if (bci >= 4) {
-      unsigned zlen = 0;
-#ifdef T2_PRINT_DISASS
-      unsigned start_bci = bci;
-#endif
-
-      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
-      do {
-	zlen += len;
-	bci += len;
-	if (bci >= code_size) break;
-	opcode = code_base[bci];
-	stackinfo = bc_stackinfo[bci];
-
-	if (stackinfo & BC_BRANCH_TARGET) break;
-
-	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
-
-	if (opcode > OPC_LAST_JAVA_OP) {
-	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
-	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
-	}
-
-	len = Bytecodes::length_for((Bytecodes::Code)opcode);
-	if (len <= 0) {
-	  Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
-	  len = (Bytecodes::special_length_at
-		 (code,
-		  (address)(code_base+bci), (address)(code_base+code_size)));
-	}
-
-      } while (1);
-#ifdef T2_PRINT_DISASS
-      end_bci[start_idx] = start_bci + zlen;
-#endif
-      continue;
-    }
-#endif
-
-#ifdef T2_PRINT_DISASS
-    end_bci[start_idx] = bci + len;
-#endif
-
-#ifdef THUMB2_JVMTI
-    // emit a start address --> bci map entry before
-    // generating machine code for this bytecode
-
-    void *addr = (void *)(codebuf->codebuf + codebuf->idx);
-    address_bci_map_add(addr, bci);
-#endif //THUMB2_JVMTI
-
-    switch (opcode) {
-      case opc_nop:
-	break;
-      case opc_aconst_null:
-	len += Thumb2_Imm(jinfo, 0, bci+1);
-	break;
-      case opc_iconst_m1:
-      case opc_iconst_0:
-      case opc_iconst_1:
-      case opc_iconst_2:
-      case opc_iconst_3:
-      case opc_iconst_4:
-      case opc_iconst_5:
-	len += Thumb2_Imm(jinfo, opcode - (unsigned)opc_iconst_0, bci+1);
-	break;
-      case opc_lconst_0:
-      case opc_lconst_1:
-	Thumb2_ImmX2(jinfo, opcode - (unsigned)opc_lconst_0, 0);
-	break;
-      case opc_fconst_0:
-      case opc_fconst_1:
-      case opc_fconst_2: {
-	unsigned v = 0;
-	if (opcode == (unsigned)opc_fconst_1) v = 0x3f800000;
-	if (opcode == (unsigned)opc_fconst_2) v = 0x40000000;
-	len += Thumb2_Imm(jinfo, v, bci+1);
-	break;
-      }
-      case opc_dconst_0:
-      case opc_dconst_1: {
-	unsigned v_hi = 0;
-	if (opcode == (unsigned)opc_dconst_1) v_hi = 0x3ff00000;
-	Thumb2_ImmX2(jinfo, 0, v_hi);
-	break;
-      }
-      case opc_bipush:
-	len += Thumb2_Imm(jinfo, GET_JAVA_S1(code_base+bci+1), bci+2);
-	break;
-      case opc_sipush:
-	len += Thumb2_Imm(jinfo, GET_JAVA_S2(code_base+bci+1), bci+3);
-	break;
-      case opc_ldc:
-      case opc_ldc_w:
-      case opc_ldc2_w: {
-	unsigned index = (opcode == (unsigned)opc_ldc) ?
-				code_base[bci+1] : GET_JAVA_U2(code_base+bci+1);
-	constantPoolOop constants = jinfo->method->constants();
-	unsigned v;
-
-	switch (v = constants->tag_at(index).value()) {
-	  case JVM_CONSTANT_Integer:
-	  case JVM_CONSTANT_Float:
-	    v = (unsigned)constants->int_at(index);
-	    len += Thumb2_Imm(jinfo, v, bci+len);
-	    break;
-	  case JVM_CONSTANT_Long:
-	  case JVM_CONSTANT_Double: {
-	    unsigned long long v;
-	    v = constants->long_at(index);
-	    Thumb2_ImmX2(jinfo, v & 0xffffffff, v >> 32);
-	    break;
-	  }
-	  case JVM_CONSTANT_Class:
-	  case JVM_CONSTANT_String: {
-	    Reg r;
-	    Thumb2_Spill(jinfo, 1, 0);
-	    r = JSTACK_REG(jstack);
-	    PUSH(jstack, r);
-	    load_istate(jinfo, r, ISTATE_METHOD, stackdepth+1);
-	    ldr_imm(jinfo->codebuf, r, r, METHOD_CONSTMETHOD, 1, 0);
-	    ldr_imm(jinfo->codebuf, r, r, METHOD_CONSTANTS, 1, 0);
-	    ldr_imm(jinfo->codebuf, r, r, CONSTANTPOOL_BASE + (index << 2), 1, 0);
-	    if (v == JVM_CONSTANT_Class)
-	      ldr_imm(jinfo->codebuf, r, r, KLASS_PART+KLASS_JAVA_MIRROR, 1, 0);
-	    break;
-	  }
-	  default:
-	    unsigned loc;
-
-	    JASSERT(opcode != opc_ldc2_w, "ldc2_w unresolved?");
-	    Thumb2_Flush(jinfo);
-	    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  Thumb2_save_local_refs(jinfo, stackdepth);
-//	    mov_imm(jinfo->codebuf, ARM_R1, opcode != opc_ldc);
-	    bl(jinfo->codebuf, handlers[opcode == opc_ldc ? H_LDC : H_LDC_W]);
-	  Thumb2_restore_local_refs(jinfo, stackdepth);
-	    ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT, 1, 0);
-	    mov_imm(jinfo->codebuf, ARM_R2, 0);
-	    str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT, 1, 0);
-	    loc = forward_16(jinfo->codebuf);
-	    bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	    cbnz_patch(jinfo->codebuf, ARM_R0, loc);
-	    PUSH(jstack, ARM_R0);
-	    break;
-	}
-	break;
-      }
-
-      case opc_iload:
-      case opc_fload:
-      case opc_aload:
-	Thumb2_Load(jinfo, code_base[bci+1], stackdepth);
-	break;
-      case opc_lload:
-      case opc_dload:
-	Thumb2_LoadX2(jinfo, code_base[bci+1], stackdepth);
-	break;
-      case opc_iload_0:
-      case opc_iload_1:
-      case opc_iload_2:
-      case opc_iload_3:
-      case opc_fload_0:
-      case opc_fload_1:
-      case opc_fload_2:
-      case opc_fload_3:
-      case opc_aload_0:
-      case opc_aload_1:
-      case opc_aload_2:
-      case opc_aload_3:
-	Thumb2_Load(jinfo, (opcode - opc_iload_0) & 3, stackdepth);
-	break;
-      case opc_lload_0:
-      case opc_lload_1:
-      case opc_lload_2:
-      case opc_lload_3:
-      case opc_dload_0:
-      case opc_dload_1:
-      case opc_dload_2:
-      case opc_dload_3:
-	Thumb2_LoadX2(jinfo, (opcode - opc_iload_0) & 3, stackdepth);
-	break;
-      case opc_iaload:
-      case opc_faload:
-      case opc_aaload:
-      case opc_baload:
-      case opc_caload:
-      case opc_saload:
-	Thumb2_Xaload(jinfo, opcode);
-	break;
-      case opc_laload:
-      case opc_daload:
-	Thumb2_X2aload(jinfo);
-	break;
-      case opc_istore:
-      case opc_fstore:
-      case opc_astore:
-	Thumb2_Store(jinfo, code_base[bci+1], stackdepth);
-	break;
-      case opc_lstore:
-      case opc_dstore:
-	Thumb2_StoreX2(jinfo, code_base[bci+1], stackdepth);
-	break;
-      case opc_istore_0:
-      case opc_istore_1:
-      case opc_istore_2:
-      case opc_istore_3:
-      case opc_fstore_0:
-      case opc_fstore_1:
-      case opc_fstore_2:
-      case opc_fstore_3:
-      case opc_astore_0:
-      case opc_astore_1:
-      case opc_astore_2:
-      case opc_astore_3:
-	Thumb2_Store(jinfo, (opcode - opc_istore_0) & 3, stackdepth);
-	break;
-      case opc_lstore_0:
-      case opc_lstore_1:
-      case opc_lstore_2:
-      case opc_lstore_3:
-      case opc_dstore_0:
-      case opc_dstore_1:
-      case opc_dstore_2:
-      case opc_dstore_3:
-	Thumb2_StoreX2(jinfo, (opcode - opc_istore_0) & 3, stackdepth);
-	break;
-      case opc_iastore:
-      case opc_fastore:
-      case opc_bastore:
-      case opc_castore:
-      case opc_sastore:
-	Thumb2_Xastore(jinfo, opcode);
-	break;
-      case opc_lastore:
-      case opc_dastore:
-	Thumb2_X2astore(jinfo);
-	break;
-
-      case opc_pop:
-      case opc_pop2:
-	Thumb2_Pop(jinfo, opcode - opc_pop + 1);
-	break;
-
-      case opc_dup:
-      case opc_dup_x1:
-      case opc_dup_x2:
-	Thumb2_Dup(jinfo, opcode - opc_dup);
-	break;
-
-      case opc_dup2:
-      case opc_dup2_x1:
-      case opc_dup2_x2:
-	Thumb2_Dup2(jinfo, opcode - opc_dup2);
-	break;
-
-      case opc_swap:
-	Thumb2_Swap(jinfo);
-	break;
-
-      case opc_iadd:
-      case opc_isub:
-      case opc_imul:
-      case opc_ishl:
-      case opc_ishr:
-      case opc_iushr:
-      case opc_iand:
-      case opc_ior:
-      case opc_ixor:
-	Thumb2_iOp(jinfo, opcode);
-	break;
-
-      case opc_ladd:
-      case opc_lsub:
-      case opc_land:
-      case opc_lor:
-      case opc_lxor:
-	Thumb2_lOp(jinfo, opcode);
-	break;
-
-      case opc_lshl: {
-	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
-	unsigned loc1, loc2;
-
-	Thumb2_Fill(jinfo, 3);
-	shift = POP(jstack);
-	lho_lo = POP(jstack);
-	lho_hi = POP(jstack);
-	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
-	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
-	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
-	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
-	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
-	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
-	tst_imm(jinfo->codebuf, shift, 32);
-	loc1 = forward_16(jinfo->codebuf);
-	mov_imm(jinfo->codebuf, res_lo, 0);
-	dop_reg(jinfo->codebuf, DP_LSL, res_hi, lho_lo, ARM_IP, SHIFT_LSL, 0);
-	loc2 = forward_16(jinfo->codebuf);
-	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
-	dop_reg(jinfo->codebuf, DP_LSL, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
-	dop_reg(jinfo->codebuf, DP_LSL, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
-	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
-	dop_reg(jinfo->codebuf, DP_LSR, ARM_IP, lho_lo, ARM_IP, SHIFT_LSL, 0);
-	dop_reg(jinfo->codebuf, DP_ORR, res_hi, res_hi, ARM_IP, SHIFT_LSL, 0);
-	branch_narrow_patch(jinfo->codebuf, loc2);
-	break;
-      }
-
-      case opc_lushr: {
-	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
-	unsigned loc1, loc2;
-
-	Thumb2_Fill(jinfo, 3);
-	shift = POP(jstack);
-	lho_lo = POP(jstack);
-	lho_hi = POP(jstack);
-	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
-	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
-	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
-	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
-	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
-	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
-	tst_imm(jinfo->codebuf, shift, 32);
-	loc1 = forward_16(jinfo->codebuf);
-	mov_imm(jinfo->codebuf, res_hi, 0);
-	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_hi, ARM_IP, SHIFT_LSL, 0);
-	loc2 = forward_16(jinfo->codebuf);
-	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
-	dop_reg(jinfo->codebuf, DP_LSR, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
-	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
-	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
-	dop_reg(jinfo->codebuf, DP_LSL, ARM_IP, lho_hi, ARM_IP, SHIFT_LSL, 0);
-	dop_reg(jinfo->codebuf, DP_ORR, res_lo, res_lo, ARM_IP, SHIFT_LSL, 0);
-	branch_narrow_patch(jinfo->codebuf, loc2);
-	break;
-      }
-
-      case opc_lshr: {
-	Reg lho_lo, lho_hi, res_lo, res_hi, shift;
-	unsigned loc1, loc2;
-
-	Thumb2_Fill(jinfo, 3);
-	shift = POP(jstack);
-	lho_lo = POP(jstack);
-	lho_hi = POP(jstack);
-	Thumb2_Spill(jinfo, 2, (1<<lho_lo)|(1<<lho_hi));
-	res_hi = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
-	res_lo = PUSH(jstack, JSTACK_PREFER(jstack, ~((1<<lho_lo)|(1<<lho_hi))));
-	JASSERT(res_lo != lho_lo && res_lo != lho_hi, "Spill failed");
-	JASSERT(res_hi != lho_lo && res_hi != lho_hi, "Spill failed");
-	and_imm(jinfo->codebuf, ARM_IP, shift, 31);
-	tst_imm(jinfo->codebuf, shift, 32);
-	loc1 = forward_16(jinfo->codebuf);
-	asr_imm(jinfo->codebuf, res_hi, lho_hi, 31);
-	dop_reg(jinfo->codebuf, DP_ASR, res_lo, lho_hi, ARM_IP, SHIFT_LSL, 0);
-	loc2 = forward_16(jinfo->codebuf);
-	bcc_patch(jinfo->codebuf, COND_EQ, loc1);
-	dop_reg(jinfo->codebuf, DP_ASR, res_hi, lho_hi, ARM_IP, SHIFT_LSL, 0);
-	dop_reg(jinfo->codebuf, DP_LSR, res_lo, lho_lo, ARM_IP, SHIFT_LSL, 0);
-	rsb_imm(jinfo->codebuf, ARM_IP, ARM_IP, 32);
-	dop_reg(jinfo->codebuf, DP_LSL, ARM_IP, lho_hi, ARM_IP, SHIFT_LSL, 0);
-	dop_reg(jinfo->codebuf, DP_ORR, res_lo, res_lo, ARM_IP, SHIFT_LSL, 0);
-	branch_narrow_patch(jinfo->codebuf, loc2);
-	break;
-      }
-
-      case opc_lmul:
-	Thumb2_lmul(jinfo);
-	break;
-
-      case opc_fadd:
-      case opc_fsub:
-      case opc_fmul:
-      case opc_fdiv:
-	Thumb2_fOp(jinfo, opcode);
-	break;
-
-      case opc_dadd:
-      case opc_dsub:
-      case opc_dmul:
-      case opc_ddiv:
-	Thumb2_dOp(jinfo, opcode);
-	break;
-
-      case opc_fcmpl:
-      case opc_fcmpg: {
-	Thumb2_Stack *jstack = jinfo->jstack;
-	unsigned rho, lho, res;
-	unsigned loc1, loc2, loc_ne;
-
-	Thumb2_Fill(jinfo, 2);
-	rho = POP(jstack);
-	lho = POP(jstack);
-	Thumb2_Spill(jinfo, 1, 0);
-	res = PUSH(jstack, JSTACK_REG(jstack));
-	vmov_reg_s_toVFP(jinfo->codebuf, VFP_S0, lho);
-	vmov_reg_s_toVFP(jinfo->codebuf, VFP_S1, rho);
-	vcmp_reg_s(jinfo->codebuf, VFP_S0, VFP_S1, 1);
-	mov_imm(jinfo->codebuf, res, opcode == opc_fcmpl ? 1 : -1);
-	vmrs(jinfo->codebuf, ARM_PC);
-	loc1 = forward_16(jinfo->codebuf);
-	dop_imm_preserve(jinfo->codebuf, DP_RSB, res, res, 0);
-	loc2 = forward_16(jinfo->codebuf);
-	vcmp_reg_s(jinfo->codebuf, VFP_S0, VFP_S1, 0);
-	loc_ne = forward_16(jinfo->codebuf);
-	mov_imm(jinfo->codebuf, res, 0);
-	bcc_patch(jinfo->codebuf, opcode == opc_fcmpl ? COND_GT : COND_MI, loc1);
-	bcc_patch(jinfo->codebuf, opcode == opc_fcmpl ? COND_MI : COND_GT, loc2);
-	bcc_patch(jinfo->codebuf, COND_NE, loc_ne);
-	break;
-      }
-
-      case opc_dcmpl:
-      case opc_dcmpg: {
-	Thumb2_Stack *jstack = jinfo->jstack;
-	unsigned rho_lo, rho_hi, lho_lo, lho_hi, res;
-	unsigned loc1, loc2, loc_ne;
-
-	Thumb2_Fill(jinfo, 4);
-	rho_lo = POP(jstack);
-	rho_hi = POP(jstack);
-	lho_lo = POP(jstack);
-	lho_hi = POP(jstack);
-	Thumb2_Spill(jinfo, 1, 0);
-	res = PUSH(jstack, JSTACK_REG(jstack));
-	vmov_reg_d_toVFP(jinfo->codebuf, VFP_S0, lho_lo, lho_hi);
-	vmov_reg_d_toVFP(jinfo->codebuf, VFP_S1, rho_lo, rho_hi);
-	vcmp_reg_d(jinfo->codebuf, VFP_S0, VFP_S1, 1);
-	mov_imm(jinfo->codebuf, res, opcode == opc_dcmpl ? 1 : -1);
-	vmrs(jinfo->codebuf, ARM_PC);
-	loc1 = forward_16(jinfo->codebuf);
-	dop_imm_preserve(jinfo->codebuf, DP_RSB, res, res, 0);
-	loc2 = forward_16(jinfo->codebuf);
-	vcmp_reg_d(jinfo->codebuf, VFP_S0, VFP_S1, 0);
-	loc_ne = forward_16(jinfo->codebuf);
-	mov_imm(jinfo->codebuf, res, 0);
-	bcc_patch(jinfo->codebuf, opcode == opc_dcmpl ? COND_GT : COND_MI, loc1);
-	bcc_patch(jinfo->codebuf, opcode == opc_dcmpl ? COND_MI : COND_GT, loc2);
-	bcc_patch(jinfo->codebuf, COND_NE, loc_ne);
-	break;
-      }
-
-      case opc_drem:
-      case opc_lrem:
-      case opc_ldiv: {
-	Reg src[4], dst[4];
-
-	Thumb2_Fill(jinfo, 4);
-	src[2] = POP(jstack);
-	src[3] = POP(jstack);
-	src[0] = POP(jstack);
-	src[1] = POP(jstack);
-	Thumb2_Flush(jinfo);
-	dst[0] = ARM_R0;
-	dst[1] = ARM_R1;
-	dst[2] = ARM_R2;
-	dst[3] = ARM_R3;
-	mov_multiple(jinfo->codebuf, dst, src, 4);
-	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
-	if (opcode != opc_lrem) {
-	  PUSH(jstack, ARM_R1);
-	  PUSH(jstack, ARM_R0);
-	} else {
-	  PUSH(jstack, ARM_R3);
-	  PUSH(jstack, ARM_R2);
-	}
-	break;
-      }
-
-      case opc_frem:
-      case opc_idiv:
-      case opc_irem: {
-	Reg r_rho, r_lho;
-
-	Thumb2_Fill(jinfo, 2);
-	r_rho = POP(jstack);
-	r_lho = POP(jstack);
-	Thumb2_Flush(jinfo);
-	if (r_rho == ARM_R0) {
-	  if (r_lho == ARM_R1) {
-	    mov_reg(jinfo->codebuf, ARM_IP, r_rho);
-	    mov_reg(jinfo->codebuf, ARM_R0, r_lho);
-	    mov_reg(jinfo->codebuf, ARM_R1, ARM_IP);
-	  } else {
-	    mov_reg(jinfo->codebuf, ARM_R1, r_rho);
-	    mov_reg(jinfo->codebuf, ARM_R0, r_lho);
-	  }
-	} else {
-	  mov_reg(jinfo->codebuf, ARM_R0, r_lho);
-	  mov_reg(jinfo->codebuf, ARM_R1, r_rho);
-	}
-	if (opcode == opc_frem)
-	  bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
-	else
-	  blx(jinfo->codebuf, OPCODE2HANDLER(opcode));
-	PUSH(jstack, ARM_R0);
-	break;
-      }
-
-      case opc_f2i:
-      case opc_i2f: {
-	Reg r;
-
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-	Thumb2_Flush(jinfo);
-	mov_reg(jinfo->codebuf, ARM_R0, r);
-	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
-	PUSH(jstack, ARM_R0);
-	break;
-      }
-
-      case opc_f2d:
-      case opc_f2l:
-      case opc_i2d: {
-	Reg r;
-
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-	Thumb2_Flush(jinfo);
-	mov_reg(jinfo->codebuf, ARM_R0, r);
-	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
-	PUSH(jstack, ARM_R1);
-	PUSH(jstack, ARM_R0);
-	break;
-    }
-
-      case opc_d2f:
-      case opc_d2i:
-      case opc_l2d:
-      case opc_d2l:
-      case opc_l2f: {
-	Reg lo, hi;
-
-	Thumb2_Fill(jinfo, 2);
-	lo = POP(jstack);
-	hi = POP(jstack);
-	Thumb2_Flush(jinfo);
-	if (hi == ARM_R0) {
-	  if (lo == ARM_R1) {
-	    mov_reg(jinfo->codebuf, ARM_IP, hi);
-	    mov_reg(jinfo->codebuf, ARM_R0, lo);
-	    mov_reg(jinfo->codebuf, ARM_R1, ARM_IP);
-	  } else {
-	    mov_reg(jinfo->codebuf, ARM_R1, hi);
-	    mov_reg(jinfo->codebuf, ARM_R0, lo);
-	  }
-	} else {
-	  mov_reg(jinfo->codebuf, ARM_R0, lo);
-	  mov_reg(jinfo->codebuf, ARM_R1, hi);
-	}
-	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
-	if (opcode == opc_l2d || opcode == opc_d2l) PUSH(jstack, ARM_R1);
-	PUSH(jstack, ARM_R0);
-	break;
-      }
-
-      case opc_ineg:
-	Thumb2_iNeg(jinfo, opcode);
-	break;
-
-      case opc_lneg:
-	Thumb2_lNeg(jinfo, opcode);
-	break;
-
-      case opc_fneg:
-	Thumb2_fNeg(jinfo, opcode);
-	break;
-
-      case opc_dneg:
-	Thumb2_dNeg(jinfo);
-	break;
-
-      case opc_i2l: {
-	unsigned r, r_res_lo, r_res_hi;
-
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-	Thumb2_Spill(jinfo, 2, 0);
-	r_res_hi = PUSH(jstack, JSTACK_REG(jstack));
-	r_res_lo = PUSH(jstack, JSTACK_REG(jstack));
-	if (r == r_res_hi) {
-	  SWAP(jstack);
-	  r_res_hi = r_res_lo;
-	  r_res_lo = r;
-	}
-	mov_reg(jinfo->codebuf, r_res_lo, r);
-	asr_imm(jinfo->codebuf, r_res_hi, r, 31);
-	break;
-      }
-
-      case opc_l2i: {
-	unsigned r_lo, r_hi;
-	unsigned r;
-
-	Thumb2_Fill(jinfo, 2);
-	r_lo = POP(jstack);
-	r_hi = POP(jstack);
-	Thumb2_Spill(jinfo, 1, 0);
-	r = PUSH(jstack, r_lo);
-	break;
-      }
-
-      case opc_i2b: {
-	unsigned r_src, r_dst;
-
-	Thumb2_Fill(jinfo, 1);
-	r_src = POP(jstack);
-	Thumb2_Spill(jinfo, 1, 0);
-	r_dst = PUSH(jstack, JSTACK_REG(jstack));
-	sxtb(jinfo->codebuf, r_dst, r_src);
-	break;
-      }
-
-      case opc_i2s: {
-	unsigned r_src, r_dst;
-
-	Thumb2_Fill(jinfo, 1);
-	r_src = POP(jstack);
-	Thumb2_Spill(jinfo, 1, 0);
-	r_dst = PUSH(jstack, JSTACK_REG(jstack));
-	sxth(jinfo->codebuf, r_dst, r_src);
-	break;
-      }
-
-      case opc_i2c: {
-	unsigned r_src, r_dst;
-
-	Thumb2_Fill(jinfo, 1);
-	r_src = POP(jstack);
-	Thumb2_Spill(jinfo, 1, 0);
-	r_dst = PUSH(jstack, JSTACK_REG(jstack));
-	uxth(jinfo->codebuf, r_dst, r_src);
-	break;
-      }
-
-      case opc_lcmp: {
-	unsigned lho_lo, lho_hi;
-	unsigned rho_lo, rho_hi;
-	unsigned r_tmp_lo, r_tmp_hi;
-	unsigned res;
-	unsigned loc_lt, loc_eq;
-
-	Thumb2_Fill(jinfo, 4);
-	rho_lo = POP(jstack);
-	rho_hi = POP(jstack);
-	lho_lo = POP(jstack);
-	lho_hi = POP(jstack);
-	Thumb2_Spill(jinfo, 1, 0);
-	res = JSTACK_REG(jstack);
-	PUSH(jstack, res);
-	r_tmp_lo = Thumb2_Tmp(jinfo, (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi));
-	r_tmp_hi = Thumb2_Tmp(jinfo, (1<<rho_lo)|(1<<rho_hi)|(1<<lho_lo)|(1<<lho_hi)|(1<<r_tmp_lo));
-	dop_reg(jinfo->codebuf, DP_SUB, r_tmp_lo, lho_lo, rho_lo, SHIFT_LSL, 0);
-	dop_reg(jinfo->codebuf, DP_SBC, r_tmp_hi, lho_hi, rho_hi, SHIFT_LSL, 0);
-	mov_imm(jinfo->codebuf, res, (unsigned)-1);
-	loc_lt = forward_16(jinfo->codebuf);
-	dop_reg(jinfo->codebuf, DP_ORR, res, r_tmp_lo, r_tmp_hi, SHIFT_LSL, 0);
-	loc_eq = forward_16(jinfo->codebuf);
-	mov_imm(jinfo->codebuf, res, 1);
-	bcc_patch(jinfo->codebuf, COND_LT, loc_lt);
-	bcc_patch(jinfo->codebuf, COND_EQ, loc_eq);
-	break;
-      }
-
-      case opc_iinc: {
-	unsigned local = code_base[bci+1];
-	int constant = GET_JAVA_S1(code_base+bci+2);
-	unsigned r = jinfo->jregs->r_local[local];
-
-	if (!r) {
-	  int nlocals = jinfo->method->max_locals();
-	  r = Thumb2_Tmp(jinfo, 0);
-	  stackdepth -= jstack->depth;
-	  load_local(jinfo, r, local, stackdepth);
-	  add_imm(jinfo->codebuf, r, r, constant);
-	  store_local(jinfo, r, local, stackdepth);
-	} else {
-	  Thumb2_Corrupt(jinfo, r, 0);
-	  add_imm(jinfo->codebuf, r, r, constant);
-	}
-	break;
-      }
-
-      case opc_getfield: {
-	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-        ConstantPoolCacheEntry* cache;
-	int index = GET_NATIVE_U2(code_base+bci+1);
-	Reg r_obj;
-
-        cache = cp->entry_at(index);
-        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
- 	  int java_index = GET_NATIVE_U2(code_base+bci+1);
-	  constantPoolOop pool = jinfo->method->constants();
-	  Symbol *sig = pool->signature_ref_at(java_index);
-	  const jbyte *base = sig->base();
-	  jbyte c = *base;
-	  int handler = H_GETFIELD_WORD;
-
-	  if (c == 'J' || c == 'D') handler = H_GETFIELD_DW;
-	  if (c == 'B' || c == 'Z') handler = H_GETFIELD_SB;
-	  if (c == 'C') handler = H_GETFIELD_H;
-	  if (c == 'S') handler = H_GETFIELD_SH;
-	  Thumb2_Flush(jinfo);
-	  Thumb2_save_local_refs(jinfo, stackdepth);
-	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  mov_imm(jinfo->codebuf, ARM_R1, index);
-	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
-	  break;
-	}
-
-	TosState tos_type = cache->flag_state();
-	int field_offset = cache->f2_as_index();
-
-	if (tos_type == ltos || tos_type == dtos) {
-	  Reg r_lo, r_hi;
-	  Thumb2_Fill(jinfo, 1);
-	  r_obj = POP(jstack);
-	  Thumb2_Spill(jinfo, 2, 0);
-	  r_hi = PUSH(jstack, JSTACK_REG(jstack));
-	  r_lo = PUSH(jstack, JSTACK_REG(jstack));
-	  Thumb2_load_long(jinfo, r_lo, r_hi, r_obj, field_offset,
-			   cache->is_volatile());
-	} else {
-	  Reg r;
-
-	  Thumb2_Fill(jinfo, 1);
-	  r_obj = POP(jstack);
-	  Thumb2_Spill(jinfo, 1, 0);
-	  r = JSTACK_REG(jstack);
-	  PUSH(jstack, r);
-	  if (tos_type == btos)
-	    ldrsb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	  else if (tos_type == ctos)
-	    ldrh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	  else if (tos_type == stos)
-	    ldrsh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	  else
-	    ldr_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	}
-
-	if (cache->is_volatile())
-	  fullBarrier(jinfo->codebuf);
-
-	break;
-      }
-
-      case opc_getstatic: {
-	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-        ConstantPoolCacheEntry* cache;
-	int index = GET_NATIVE_U2(code_base+bci+1);
-
-        cache = cp->entry_at(index);
-        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
-	  int java_index = GET_NATIVE_U2(code_base+bci+1);
-	  constantPoolOop pool = jinfo->method->constants();
-	  Symbol *sig = pool->signature_ref_at(java_index);
-	  const jbyte *base = sig->base();
-	  jbyte c = *base;
-	  int handler = H_GETSTATIC_WORD;
-
-	  if (c == 'J' || c == 'D') handler = H_GETSTATIC_DW;
-	  if (c == 'B' || c == 'Z') handler = H_GETSTATIC_SB;
-	  if (c == 'C') handler = H_GETSTATIC_H;
-	  if (c == 'S') handler = H_GETSTATIC_SH;
-	  Thumb2_Flush(jinfo);
-	  Thumb2_save_local_refs(jinfo, stackdepth);
-	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  mov_imm(jinfo->codebuf, ARM_R1, index);
-	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
-	  break;
-	}
-
-	TosState tos_type = cache->flag_state();
-	int field_offset = cache->f2_as_index();
-	JDEBUG_( tty->print("f2_as_index getstatic %d: %s: %s %d\n", index , name->as_C_string(), sig->as_C_string(), field_offset); );
-
-	if (tos_type == ltos || tos_type == dtos) {
-	  Reg r_lo, r_hi, r_addr;
-	  Thumb2_Spill(jinfo, 2, 0);
-	  r_hi = PUSH(jstack, JSTACK_REG(jstack));
-	  r_lo = PUSH(jstack, JSTACK_REG(jstack));
-	  r_addr = Thumb2_Tmp(jinfo, (1<<r_hi) | (1<<r_lo));
-	  load_istate(jinfo, r_lo, ISTATE_CONSTANTS, stackdepth+2);
-	  ldr_imm(jinfo->codebuf, r_addr, r_lo, CP_OFFSET + (index << 4) + 4, 1, 0);
-	  Thumb2_load_long(jinfo, r_lo, r_hi, r_addr, field_offset,
-			   cache->is_volatile());
-	} else {
-	  Reg r;
-	  Thumb2_Spill(jinfo, 1, 0);
-	  r = JSTACK_REG(jstack);
-	  PUSH(jstack, r);
-	  load_istate(jinfo, r, ISTATE_CONSTANTS, stackdepth+1);
-	  ldr_imm(jinfo->codebuf, r, r, CP_OFFSET + (index << 4) + 4, 1, 0);
-	  if (tos_type == btos)
-	    ldrsb_imm(jinfo->codebuf, r, r, field_offset, 1, 0);
-	  else if (tos_type == ctos)
-	    ldrh_imm(jinfo->codebuf, r, r, field_offset, 1, 0);
-	  else if (tos_type == stos)
-	    ldrsh_imm(jinfo->codebuf, r, r, field_offset, 1, 0);
-	  else
-	    ldr_imm(jinfo->codebuf, r, r, field_offset, 1, 0);
-	}
-
-	if (cache->is_volatile())
-	  fullBarrier(jinfo->codebuf);
-
-	break;
-      }
-
-      case opc_putfield: {
-	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-        ConstantPoolCacheEntry* cache;
-	int index = GET_NATIVE_U2(code_base+bci+1);
-	Reg r_obj;
-
-        cache = cp->entry_at(index);
-
-        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
-	  int java_index = GET_NATIVE_U2(code_base+bci+1);
-	  constantPoolOop pool = jinfo->method->constants();
-	  Symbol *sig = pool->signature_ref_at(java_index);
-	  const jbyte *base = sig->base();
-	  jbyte c = *base;
-	  int handler = H_PUTFIELD_WORD;
-
-	  if (c == 'J' || c == 'D') handler = H_PUTFIELD_DW;
-	  if (c == 'B' || c == 'Z') handler = H_PUTFIELD_B;
-	  if (c == 'C' || c == 'S') handler = H_PUTFIELD_H;
- 	  if (c == '[' || c == 'L') handler = H_PUTFIELD_A;
-	  Thumb2_Flush(jinfo);
-	  Thumb2_save_local_refs(jinfo, stackdepth);
-	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  mov_imm(jinfo->codebuf, ARM_R1, index);
-	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
-
-	  break;
-	}
-
-	if (cache->is_volatile())
-	  storeBarrier(jinfo->codebuf);
-
-	TosState tos_type = cache->flag_state();
-	int field_offset = cache->f2_as_index();
-
-	if (tos_type == ltos || tos_type == dtos) {
-	  Reg r_lo, r_hi;
-	  Thumb2_Fill(jinfo, 3);
-	  r_lo = POP(jstack);
-	  r_hi = POP(jstack);
-	  r_obj = POP(jstack);
-	  Thumb2_store_long(jinfo, r_lo, r_hi, r_obj, field_offset, cache->is_volatile());
-	} else {
-	  Reg r;
-	  Thumb2_Fill(jinfo, 2);
-	  r = POP(jstack);
-	  r_obj = POP(jstack);
-	  if (tos_type == btos)
-	    strb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	  else if (tos_type == ctos | tos_type == stos)
-	    strh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	  else {
-	    str_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	    if (tos_type == atos) {
-	      Thumb2_Flush(jinfo);
-	      mov_reg(jinfo->codebuf, ARM_R0, r_obj);
-	      bl(jinfo->codebuf, handlers[H_APUTFIELD]);
-	    }
-	  }
-	}
-
-	if (cache->is_volatile())
-	  fullBarrier(jinfo->codebuf);
-
-	break;
-      }
-
-      case opc_putstatic: {
-	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-        ConstantPoolCacheEntry* cache;
-	int index = GET_NATIVE_U2(code_base+bci+1);
-
-        cache = cp->entry_at(index);
-        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
-	  int java_index = GET_NATIVE_U2(code_base+bci+1);
-	  constantPoolOop pool = jinfo->method->constants();
-	  Symbol *sig = pool->signature_ref_at(java_index);
-	  const jbyte *base = sig->base();
-	  jbyte c = *base;
-	  int handler = H_PUTSTATIC_WORD;
-
-	  if (c == 'J' || c == 'D') handler = H_PUTSTATIC_DW;
-	  if (c == 'B' || c == 'Z') handler = H_PUTSTATIC_B;
-	  if (c == 'C' || c == 'S') handler = H_PUTSTATIC_H;
-	  if (c == '[' || c == 'L') handler = H_PUTSTATIC_A;
-	  Thumb2_Flush(jinfo);
-	  Thumb2_save_local_refs(jinfo, stackdepth);
-	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  mov_imm(jinfo->codebuf, ARM_R1, index);
-	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_local_refs(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
-	  break;
-	}
-
-	if (cache->is_volatile())
-	  storeBarrier(jinfo->codebuf);
-
-	TosState tos_type = cache->flag_state();
-	int field_offset = cache->f2_as_index();
-	Reg r_obj;
-
-	if (tos_type == ltos || tos_type == dtos) {
-	  Reg r_lo, r_hi;
-	  Thumb2_Fill(jinfo, 2);
-	  r_lo = POP(jstack);
-	  r_hi = POP(jstack);
-	  Thumb2_Spill(jinfo, 1, (1<<r_lo)|(1<<r_hi));
-	  r_obj = JSTACK_PREFER(jstack, ~((1<<r_lo)|(1<<r_hi)));
-	  JASSERT(r_obj != r_lo && r_obj != r_hi, "corruption in putstatic");
-	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-2);
-	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4, 1, 0);
-	  Thumb2_store_long(jinfo, r_lo, r_hi, r_obj, field_offset, cache->is_volatile());
-	} else {
-	  Reg r;
-	  Thumb2_Fill(jinfo, 1);
-	  r = POP(jstack);
-	  Thumb2_Spill(jinfo, 1, (1<<r));
-	  r_obj = JSTACK_PREFER(jstack, ~(1<<r));
-	  JASSERT(r_obj != r, "corruption in putstatic");
-	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-1);
-	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4, 1, 0);
-	  if (tos_type == btos)
-	    strb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	  else if (tos_type == ctos | tos_type == stos)
-	    strh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	  else {
-	    str_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	    if (tos_type == atos) {
-	      Thumb2_Flush(jinfo);
-	      mov_reg(jinfo->codebuf, ARM_R0, r_obj);
-	      bl(jinfo->codebuf, handlers[H_APUTFIELD]);
-	    }
-	  }
-	}
-
-	if (cache->is_volatile())
-	  fullBarrier(jinfo->codebuf);
-
-	break;
-      }
-
-      case opc_invokevirtual:
-      case opc_invokestatic:
-      case opc_invokespecial: {
-	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-        ConstantPoolCacheEntry* cache;
-	int index = GET_NATIVE_U2(code_base+bci+1);
-	unsigned loc;
-	methodOop callee;
-
-	// Call Debug if we're about to enter a synchronized method.
-#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
-	if (DebugSwitch && jinfo->method->is_synchronized()) {
-	  stm(jinfo->codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-	  add_imm(jinfo->codebuf, ARM_R0, ISTATE_REG(jinfo), ISTATE_OFFSET(jinfo, stackdepth, 0));
-	  mov_imm(jinfo->codebuf, ARM_IP, (u32)Debug);
-	  load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
-	  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-	  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
-	  store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
-	  blx_reg(jinfo->codebuf, ARM_IP);
-	  ldm(jinfo->codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, POP_FD, 1);
-	}
-#undef DEBUG_REGSET
-
-        cache = cp->entry_at(index);
-        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
-	  Thumb2_Flush(jinfo);
-	  Thumb2_save_all_locals(jinfo, stackdepth);
-	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  mov_imm(jinfo->codebuf, ARM_R1, index);
-	  blx(jinfo->codebuf,
-	    handlers[opcode == opc_invokestatic ? H_INVOKESTATIC :
-		     opcode == opc_invokespecial ? H_INVOKESPECIAL : H_INVOKEVIRTUAL]);
-	  Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
-	  break;
-	}
-
-	callee = opcode == opc_invokevirtual ? (methodOop)cache->f2_as_index() : (methodOop)cache->f1_as_instance();
-
-	if (opcode != opc_invokevirtual || cache->is_vfinal()) {
-	  if (handle_special_method(callee, jinfo, stackdepth))
-	    break;
-	}
-
-	if ((opcode != opc_invokevirtual || cache->is_vfinal()) && callee->is_accessor()) {
-	  u1 *code = callee->code_base();
-	  int index = GET_NATIVE_U2(&code[2]);
-	  constantPoolCacheOop callee_cache = callee->constants()->cache();
-	  ConstantPoolCacheEntry *entry = callee_cache->entry_at(index);
-	  Reg r_obj, r;
-
-	  if (entry->is_resolved(Bytecodes::_getfield)) {
-	    JASSERT(cache->parameter_size() == 1, "not 1 parameter to accessor");
-
-	    TosState tos_type = entry->flag_state();
-	    int field_offset = entry->f2_as_index();
-
-	    JASSERT(tos_type == btos || tos_type == ctos || tos_type == stos || tos_type == atos || tos_type == itos, "not itos or atos");
-
-	    Thumb2_Fill(jinfo, 1);
-	    r_obj = POP(jstack);
-	    Thumb2_Spill(jinfo, 1, 0);
-	    r = JSTACK_REG(jstack);
-	    PUSH(jstack, r);
-	    if (tos_type == btos)
-	      ldrb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	    else if (tos_type == ctos)
-	      ldrh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	    else if (tos_type == stos)
-	      ldrsh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	    else
-	      ldr_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	    break;
-	  }
-	}
-
- 	Thumb2_Flush(jinfo);
-	if (OSPACE) {
-	  Thumb2_save_all_locals(jinfo, stackdepth);
-	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  mov_imm(jinfo->codebuf, ARM_R1, index);
-	  blx(jinfo->codebuf, handlers[
-	      opcode == opc_invokestatic ? H_INVOKESTATIC_RESOLVED :
-	      opcode == opc_invokespecial ? H_INVOKESPECIAL_RESOLVED :
-	      cache->is_vfinal() ? H_INVOKEVFINAL : H_INVOKEVIRTUAL_RESOLVED]);
-	  Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
-	  break;
-	}
-
-	load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
- 	mov_imm(jinfo->codebuf, ARM_R1, 0);
-	if (opcode != opc_invokestatic)
- 	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int), 1, 0);
-	if (opcode != opc_invokevirtual || cache->is_vfinal())
-	  load_istate(jinfo, ARM_R0, ISTATE_CONSTANTS, stackdepth);
-	ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-	if (opcode != opc_invokestatic)
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4, 1, 0);
-	if (opcode != opc_invokevirtual || cache->is_vfinal())
-	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0,
-		CP_OFFSET + (index << 4) + (opcode == opc_invokevirtual ? 8 : 4), 1, 0);
-	else
-	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R3, INSTANCEKLASS_VTABLE_OFFSET + cache->f2_as_index() * 4, 1, 0);
-	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
- 	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
- 	ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED, 1, 0);
-	store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
- 	str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
- 	Thumb2_save_all_locals(jinfo, stackdepth);
-	sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
- 	ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
-	store_istate(jinfo, Rstack, ISTATE_STACK, stackdepth+1);
-	add_imm(jinfo->codebuf, ARM_R3, ARM_R3, FAST_ENTRY_OFFSET);
- 	blx_reg(jinfo->codebuf, ARM_R3);
- 	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
-	stackdepth = STACKDEPTH(jinfo, bc_stackinfo[bci+len]);
-	ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	load_istate(jinfo, ARM_R2, ISTATE_STACK_LIMIT, stackdepth);
- 	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-	Thumb2_restore_all_locals(jinfo, stackdepth);
-	mov_imm(jinfo->codebuf, ARM_R0, 0);   // set last SP to zero
-					      // before setting FP
-	str_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-	Thumb2_restore_all_locals(jinfo, stackdepth);
-	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
-	ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC, 1, 0);
-	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP, 1, 0);
-	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_FP, 1, 0);
-	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R3, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
-	break;
-      }
-
-      case opc_invokeinterface: {
-	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-        ConstantPoolCacheEntry* cache;
-	int index = GET_NATIVE_U2(code_base+bci+1);
-	unsigned loc, loc_inc_ex;
-
-	// Currently we just call the unresolved invokeinterface entry for resolved /
-	// unresolved alike!
-	Thumb2_Flush(jinfo);
-	Thumb2_save_all_locals(jinfo, stackdepth);
-	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	mov_imm(jinfo->codebuf, ARM_R1, index);
-	blx(jinfo->codebuf, handlers[H_INVOKEINTERFACE]);
-	Thumb2_restore_all_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
-	break;
-      }
-
-    case opc_invokedynamic:
-      {
-	Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
-	break;
-      }
-
-    case opc_fast_aldc_w:
-    case opc_fast_aldc:
-      {
-	unsigned index = (opcode == (unsigned)opc_fast_aldc) ?
-				code_base[bci+1] : GET_NATIVE_U2(code_base+bci+1);
-	constantPoolOop constants = jinfo->method->constants();
-	ConstantPoolCacheEntry* cpce = constants->cache()->entry_at(index);
-        if (! cpce->is_f1_null()) {
-	  Thumb2_Spill(jinfo, 1, 0);
-	  int r = JSTACK_REG(jstack);
-	  PUSH(jstack, r);
-	  ldr_imm(jinfo->codebuf, r, Ristate, ISTATE_CONSTANTS, 1, 0);
-	  ldr_imm(jinfo->codebuf, r, r, CP_OFFSET + (index << 4) + 4, 1, 0); // offset to cache->f1_as_instance()
-	} else {
-	  Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
-	}
-	break;
-      }
-
-      case opc_jsr_w:
-      case opc_jsr: {
-	int offset = opcode == opc_jsr ?
-		GET_JAVA_S2(jinfo->code_base + bci + 1) :
-		GET_JAVA_U4(jinfo->code_base + bci + 1);
-	Reg r;
-
-	Thumb2_Spill(jinfo, 1, 0);
-	r = JSTACK_REG(jstack);
-	PUSH(jstack, r);
-	mov_imm(jinfo->codebuf, r, bci + ((opcode == opc_jsr) ? 3 : 5));
-	Thumb2_Flush(jinfo);
-	bci = Thumb2_Goto(jinfo, bci, offset, len);
-	len = 0;
-	break;
-      }
-
-      case opc_ret: {
-	Thumb2_Exit(jinfo, H_RET, bci, stackdepth);
-	break;
-      }
-
-      case opc_goto:
-      case opc_goto_w: {
-	int offset = opcode == opc_goto ?
-		GET_JAVA_S2(jinfo->code_base + bci + 1) :
-		GET_JAVA_U4(jinfo->code_base + bci + 1);
-	Thumb2_Flush(jinfo);
-	bci = Thumb2_Goto(jinfo, bci, offset, len, stackdepth);
-	len = 0;
-	break;
-      }
-
-      case opc_athrow:
-	Thumb2_Exit(jinfo, H_ATHROW, bci, stackdepth);
-	break;
-
-      case opc_ifeq:
-      case opc_ifne:
-      case opc_iflt:
-      case opc_ifge:
-      case opc_ifgt:
-      case opc_ifle:
-      case opc_ifnull:
-      case opc_ifnonnull: {
-	Reg r;
-	unsigned cond = opcode - opc_ifeq;
-	Thumb2_Cond_Safepoint(jinfo, stackdepth, bci);
-	if (opcode >= opc_ifnull) cond = opcode - opc_ifnull;
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-	Thumb2_Flush(jinfo);
-	cmp_imm(jinfo->codebuf, r, 0);
-	bci = Thumb2_Branch(jinfo, bci, cond);
-	len = 0;
-	break;
-      }
-
-      case opc_if_icmpeq:
-      case opc_if_icmpne:
-      case opc_if_icmplt:
-      case opc_if_icmpge:
-      case opc_if_icmpgt:
-      case opc_if_icmple:
-      case opc_if_acmpeq:
-      case opc_if_acmpne: {
-	Reg r_lho, r_rho;
-	unsigned cond = opcode - opc_if_icmpeq;
-	Thumb2_Cond_Safepoint(jinfo, stackdepth, bci);
-	if (opcode >= opc_if_acmpeq) cond = opcode - opc_if_acmpeq;
-	Thumb2_Fill(jinfo, 2);
-	r_rho = POP(jstack);
-	r_lho = POP(jstack);
-	Thumb2_Flush(jinfo);
-	cmp_reg(jinfo->codebuf, r_lho, r_rho);
-	bci = Thumb2_Branch(jinfo, bci, cond);
-	len = 0;
-	break;
-      }
-
-      case opc_return:
-      case opc_dreturn:
-      case opc_lreturn:
-      case opc_ireturn:
-      case opc_freturn:
-      case opc_areturn:
-	Thumb2_Return(jinfo, opcode, bci, stackdepth);
-	break;
-
-      case opc_return_register_finalizer: {
-	Thumb2_Stack *jstack = jinfo->jstack;
-	Reg r, r_tmp;
-	unsigned loc_eq;
-
-	Thumb2_Flush(jinfo);
-	Thumb2_Load(jinfo, 0, stackdepth);
-	r = POP(jstack);
-	r_tmp = Thumb2_Tmp(jinfo, (1<<r));
-	ldr_imm(jinfo->codebuf, r_tmp, r, 4, 1, 0);
-	ldr_imm(jinfo->codebuf, r_tmp, r_tmp, KLASS_PART+KLASS_ACCESSFLAGS, 1, 0);
-	tst_imm(jinfo->codebuf, r_tmp, JVM_ACC_HAS_FINALIZER);
-	loc_eq = forward_16(jinfo->codebuf);
-	Thumb2_save_local_refs(jinfo, stackdepth);
-	mov_reg(jinfo->codebuf, ARM_R1, r);
-	load_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
-	ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
-	add_imm(jinfo->codebuf, ARM_R0, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	store_istate(jinfo, ARM_R0, ISTATE_BCP, stackdepth);
-	sub_imm(jinfo->codebuf, ARM_R0, Rstack, 4);
-	store_istate(jinfo, ARM_R0, ISTATE_STACK, stackdepth);
-
-	mov_reg(jinfo->codebuf, ARM_R0, Rthread);
-	mov_imm(jinfo->codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc);
-	blx_reg(jinfo->codebuf, ARM_R3);
-
-	ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R3, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	bcc_patch(jinfo->codebuf, COND_EQ, loc_eq);
-	Thumb2_Return(jinfo, opc_return, bci, stackdepth);
-	break;
-      }
-
-      case opc_new: {
-	unsigned loc;
-
-	Thumb2_Flush(jinfo);
-	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
-	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
-      Thumb2_save_local_refs(jinfo, stackdepth);
-	bl(jinfo->codebuf, handlers[H_NEW]);
-      Thumb2_restore_local_refs(jinfo, stackdepth);
-	cmp_imm(jinfo->codebuf, ARM_R0, 0);
-	it(jinfo->codebuf, COND_EQ, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	PUSH(jstack, ARM_R0);
-	break;
-      }
-
-      case opc_aastore: {
-	Reg src[3], dst[3];
-	unsigned loc;
-
-	Thumb2_Fill(jinfo, 3);
-	src[0] = POP(jstack);	// value
-	src[1] = POP(jstack);	// index
-	src[2] = POP(jstack);	// arrayref
-	Thumb2_Flush(jinfo);
-	dst[0] = ARM_R1;
-	dst[1] = ARM_R2;
-	dst[2] = ARM_R3;
-	mov_multiple(jinfo->codebuf, dst, src, 3);
-	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-      Thumb2_save_local_refs(jinfo, stackdepth - 3);	// 3 args popped above
-	bl(jinfo->codebuf, handlers[H_AASTORE]);
-      Thumb2_restore_local_refs(jinfo, stackdepth - 3);
-	cmp_imm(jinfo->codebuf, ARM_R0, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	break;
-      }
-
-      case opc_instanceof: {
-	unsigned loc;
-	Reg r;
-
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-	Thumb2_Flush(jinfo);
-	mov_reg(jinfo->codebuf, ARM_R2, r);
-	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
-	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
-      Thumb2_save_local_refs(jinfo, stackdepth - 1);
-	bl(jinfo->codebuf, handlers[H_INSTANCEOF]);
-      Thumb2_restore_local_refs(jinfo, stackdepth - 1);	// 1 arg popped above
-	cmp_imm(jinfo->codebuf, ARM_R0, (unsigned)-1);
-	it(jinfo->codebuf, COND_EQ, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	PUSH(jstack, ARM_R0);
-	break;
-      }
-
-      case opc_checkcast: {
-	unsigned loc;
-	Reg r;
-
-	Thumb2_Fill(jinfo, 1);
-	r = TOS(jstack);
-	Thumb2_Flush(jinfo);
-	mov_reg(jinfo->codebuf, ARM_R2, r);
-	mov_imm(jinfo->codebuf, ARM_R1, GET_JAVA_U2(code_base+bci+1));
-	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
-      Thumb2_save_local_refs(jinfo, stackdepth);
-	bl(jinfo->codebuf, handlers[H_CHECKCAST]);
-      Thumb2_restore_local_refs(jinfo, stackdepth);
-	cmp_imm(jinfo->codebuf, ARM_R0, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	break;
-      }
-
-      case opc_monitorenter:
-	Thumb2_Flush(jinfo);
-	Thumb2_save_all_locals(jinfo, stackdepth);
-	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	bl(jinfo->codebuf, handlers[H_MONITORENTER]);
-	Thumb2_restore_all_locals(jinfo, stackdepth);
-	break;
-
-      case opc_monitorexit: {
-	Reg r;
-
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-	Thumb2_Flush(jinfo);
-	mov_reg(jinfo->codebuf, ARM_R1, r);
-	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
-        Thumb2_save_local_refs(jinfo, stackdepth);
-	bl(jinfo->codebuf, handlers[H_MONITOREXIT]);
-        Thumb2_restore_local_refs(jinfo, stackdepth);
-	cmp_imm(jinfo->codebuf, ARM_R0, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	break;
-      }
-
-      case opc_newarray: {
-	Reg r;
-	unsigned loc;
-
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-	Thumb2_Flush(jinfo);
-	mov_reg(jinfo->codebuf, ARM_R2, r);
-	mov_imm(jinfo->codebuf, ARM_R1, code_base[bci+1]);
-	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
-      Thumb2_save_local_refs(jinfo, stackdepth-1);
-	bl(jinfo->codebuf, handlers[H_NEWARRAY]);
-      Thumb2_restore_local_refs(jinfo, stackdepth-1);
-	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT, 1, 0);
-	mov_imm(jinfo->codebuf, ARM_R2, 0);
-  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R0, 0);
-	it(jinfo->codebuf, COND_EQ, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	PUSH(jstack, ARM_R0);
-	break;
-      }
-
-      case opc_anewarray: {
-	Reg r;
-	unsigned loc;
-
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-	Thumb2_Flush(jinfo);
-	mov_reg(jinfo->codebuf, ARM_R3, r);
-	mov_imm(jinfo->codebuf, ARM_R2, GET_JAVA_U2(code_base+bci+1));
-	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-      Thumb2_save_local_refs(jinfo, stackdepth-1);
-	bl(jinfo->codebuf, handlers[H_ANEWARRAY]);
-      Thumb2_restore_local_refs(jinfo, stackdepth-1);
-	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT, 1, 0);
-	mov_imm(jinfo->codebuf, ARM_R2, 0);
-  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R0, 0);
-	it(jinfo->codebuf, COND_EQ, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	PUSH(jstack, ARM_R0);
-	break;
-      }
-
-      case opc_multianewarray: {
-	unsigned loc;
-
-	Thumb2_Flush(jinfo);
-	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	mov_imm(jinfo->codebuf, ARM_R1, code_base[bci+3] * 4);
-      Thumb2_save_local_refs(jinfo, stackdepth);
-	bl(jinfo->codebuf, handlers[H_MULTIANEWARRAY]);
-      Thumb2_restore_local_refs(jinfo, stackdepth - code_base[bci+3]);
-	ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT, 1, 0);
-	mov_imm(jinfo->codebuf, ARM_R2, 0);
-  	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_VM_RESULT, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R0, 0);
-	it(jinfo->codebuf, COND_EQ, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
-	PUSH(jstack, ARM_R0);
-	break;
-      }
-
-      case opc_arraylength: {
-	Reg r_obj, r_len;
-
-	Thumb2_Fill(jinfo, 1);
-	r_obj = POP(jstack);
-	Thumb2_Spill(jinfo, 1, 0);
-	r_len = JSTACK_REG(jstack);
-	PUSH(jstack, r_len);
-	ldr_imm(jinfo->codebuf, r_len, r_obj, 8, 1, 0);
-	break;
-      }
-
-      case opc_lookupswitch: {
-	unsigned w;
-	unsigned nbci;
-	int def;
-	int npairs;	// The Java spec says signed but must be >= 0??
-	unsigned *table, *tablep;
-	unsigned r;
-	unsigned oldidx;
-	unsigned table_loc;
-	int i;
-
-	nbci = bci & ~3;
-	w = *(unsigned int *)(code_base + nbci + 4);
-	def = bci + (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 8);
-	npairs = (int)BYTESEX_REVERSE(w);
-	table = (unsigned int *)(code_base + nbci + 12);
-
-	Thumb2_Fill(jinfo, 1);
-	r = POP(jstack);
-
-	Thumb2_Flush(jinfo);
-
-	table_loc = out_loc(jinfo->codebuf);
-	for (i = 0, tablep = table; i < npairs; i++) {
-	  unsigned match;
-
-	  w = tablep[0];
-	  match = BYTESEX_REVERSE(w);
-	  tablep += 2;
-	  cmp_imm(jinfo->codebuf, r, match);
-	  t2_bug_align(jinfo->codebuf);
-	  forward_32(jinfo->codebuf);
-	}
-	t2_bug_align(jinfo->codebuf);
-	forward_32(jinfo->codebuf);
-	Thumb2_codegen(jinfo, bci+len);
-
-	oldidx = codebuf->idx;
-	codebuf->idx = table_loc >> 1;
-	for (i = 0, tablep = table; i < npairs; i++) {
-	  unsigned match;
-	  unsigned dest;
-	  unsigned loc;
-
-	  w = tablep[0];
-	  match = BYTESEX_REVERSE(w);
-	  w = tablep[1];
-	  dest = bci + (int)BYTESEX_REVERSE(w);
-	  tablep += 2;
-	  cmp_imm(jinfo->codebuf, r, match);
-	  JASSERT(jinfo->bc_stackinfo[dest] & BC_COMPILED, "code not compiled");
-	  t2_bug_align(jinfo->codebuf);
-	  loc = forward_32(jinfo->codebuf);
-	  branch_patch(jinfo->codebuf, COND_EQ, loc, jinfo->bc_stackinfo[dest] & ~BC_FLAGS_MASK);
-	}
-	JASSERT(jinfo->bc_stackinfo[def] & BC_COMPILED, "default in lookupswitch not compiled");
-	t2_bug_align(jinfo->codebuf);
-	branch_uncond_patch(jinfo->codebuf, out_loc(jinfo->codebuf), jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
-	codebuf->idx = oldidx;
-
-	bci = (unsigned)-1;
-	len = 0;
-
-	break;
-      }
-
-      case opc_tableswitch: {
-	int low, high, i;
-	unsigned w;
-	unsigned *table, *tablep;
-	unsigned nbci;
-	int def;
-	unsigned loc, table_loc;
-	unsigned r, rs;
-	unsigned oldidx;
-	unsigned negative_offsets, negative_branch_table;
-
-	nbci = bci & ~3;
-	w = *(unsigned int *)(code_base + nbci + 8);
-	low = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 12);
-	high = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 4);
-	def = bci + (int)BYTESEX_REVERSE(w);
-	table = (unsigned int *)(code_base + nbci + 16);
-
-	Thumb2_Fill(jinfo, 1);
-	rs = POP(jstack);
-	Thumb2_Flush(jinfo);
-	r = Thumb2_Tmp(jinfo, (1<<rs));
-	sub_imm(jinfo->codebuf, r, rs, low);
-	cmp_imm(jinfo->codebuf, r, (high-low)+1);
-	loc = 0;
-	if (jinfo->bc_stackinfo[def] & BC_COMPILED)
-	  branch(jinfo->codebuf, COND_CS, jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
-	else
-	  loc = forward_32(jinfo->codebuf);
-	tbh(jinfo->codebuf, ARM_PC, r);
-	table_loc = out_loc(jinfo->codebuf);
-	negative_offsets = 0;
-	for (i = low, tablep = table; i <= high; i++) {
-	  int offset;
-	  w = *tablep++;
-	  offset = (int)BYTESEX_REVERSE(w);
-	  if (offset < 0) negative_offsets++;
-	  out_16(jinfo->codebuf, 0);
-	}
-	negative_branch_table = out_loc(jinfo->codebuf);
-	for (i = 0; i < (int)negative_offsets; i++) {
-	  t2_bug_align(jinfo->codebuf);
-	  out_16x2(jinfo->codebuf, 0);
-	}
-
-	Thumb2_codegen(jinfo, bci+len);
-
-	if (loc) {
-	  JASSERT(jinfo->bc_stackinfo[def] & BC_COMPILED, "def not compiled in tableswitch");
-	  branch_patch(jinfo->codebuf, COND_CS, loc, jinfo->bc_stackinfo[def] & ~BC_FLAGS_MASK);
-	}
-
-	oldidx = codebuf->idx;
-	codebuf->idx = table_loc >> 1;
-	for (i = low, tablep = table; i <= high; i++) {
-	  unsigned dest;
-	  int offset;
-
-	  w = *tablep++;
-	  offset = (int)BYTESEX_REVERSE(w);
-	  dest = bci + offset;
-	  JASSERT(jinfo->bc_stackinfo[dest] & BC_COMPILED, "code not compiled");
-	  dest = jinfo->bc_stackinfo[dest] & ~BC_FLAGS_MASK;
-	  if (offset < 0) {
-	    unsigned oldidx;
-	    out_16(jinfo->codebuf, (negative_branch_table >> 1) - (table_loc >> 1));
-	    PATCH(negative_branch_table) {
-	      t2_bug_align(jinfo->codebuf);
-	      branch_uncond_patch(jinfo->codebuf, out_loc(jinfo->codebuf), dest);
-	      negative_branch_table = out_loc(jinfo->codebuf);
-	    } HCTAP;
-	  } else {
-	    JASSERT((dest & 1) == 0 && (table_loc & 1) == 0, "unaligned code");
-	    offset = (dest >> 1) - (table_loc >> 1);
-	    if (offset >= 65536) {
-	      longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
-	    }
-	    out_16(jinfo->codebuf, offset);
-	  }
-	}
-	codebuf->idx = oldidx;
-	bci = (unsigned)-1;
-	len = 0;
-	break;
-      }
-
-      case opc_wide: {
-	unsigned local = GET_JAVA_U2(code_base + bci + 2);
-	opcode = code_base[bci+1];
-	if (opcode == opc_iinc) {
-	  int constant = GET_JAVA_S2(code_base + bci + 4);
-	  unsigned r = jinfo->jregs->r_local[local];
-	  
-	  if (!r) {
-	    int nlocals = jinfo->method->max_locals();
-	    r = ARM_IP;
-	    stackdepth -= jstack->depth;
-	    load_local(jinfo, r, local, stackdepth);
-	    add_imm(jinfo->codebuf, r, r, constant);
-	    store_local(jinfo, r, local, stackdepth);
-	  } else {
-	    Thumb2_Corrupt(jinfo, r, 0);
-	    add_imm(jinfo->codebuf, r, r, constant);
-	  }
-	} else if (opcode == opc_ret) {
-	  Thumb2_Exit(jinfo, H_RET, bci, stackdepth);
-	} else {
-	  if (opcode == opc_iload ||
-	  	opcode == opc_fload || opcode == opc_aload)
-	    Thumb2_Load(jinfo, local, stackdepth);
-	  else if (opcode == opc_lload || opcode == opc_dload)
-	    Thumb2_LoadX2(jinfo, local, stackdepth);
-	  else if (opcode == opc_istore ||
-	  	opcode == opc_fstore || opcode == opc_astore)
-	    Thumb2_Store(jinfo, local, stackdepth);
-	  else if (opcode == opc_lstore || opcode == opc_dstore)
-	    Thumb2_StoreX2(jinfo, local, stackdepth);
-	  else fatal(err_msg("Undefined wide opcode %d\n", opcode));
-	}
-	break;
-      }
-
-      default:
-	JASSERT(0, "unknown bytecode");
-	break;
-    }
-    bci += len;
-#ifdef T2_PRINT_DISASS
-    if (len == 0) {
-      if (start_idx == jinfo->codebuf->idx) start_bci[start_idx] = -1;
-    } else
-      end_bci[start_idx] = bci;
-#endif
-  }
-}
-
-#define BEG_BCI_OFFSET		0
-#define END_BCI_OFFSET		1
-#define HANDLER_BCI_OFFSET	2
-#define KLASS_INDEX_OFFSET	3
-#define ENTRY_SIZE		4
-
-extern "C" int Thumb2_lr_to_bci(unsigned lr, methodOop method, Reg *regs, unsigned *locals)
-{
-  Compiled_Method *cmethod = compiled_method_list;
-  ExceptionTable table(method);
-  constantPoolOop pool = method->constants();
-  int length = table.length();
-
-  while (cmethod) {
-    unsigned *exception_table = cmethod->exception_table;
-    if (exception_table) {
-      unsigned code_base = (unsigned)cmethod;
-      if (code_base <= lr && lr <= (unsigned)exception_table) {
-	int exception_index = -1;
-	unsigned exception_found = 0;
-
-	for (int i = 0; i < length; i++) {
-	  unsigned offsets = *exception_table++;
-	  unsigned exc_beg = code_base + ((offsets >> 16) << 1);
-	  unsigned exc_end = code_base + ((offsets & 0xffff) << 1);
-
-	  if (exc_beg <= lr && lr <= exc_end) {
-	    if (exc_beg > exception_found) {
-	      // With nested try catch blocks, choose the most deeply nested
-	      exception_found = exc_beg;
-	      exception_index = i;
-	    }	    
-	  }
-	  if (exception_index >= 0) {
-	    if (regs) {
-	      for (unsigned i = 0; i < PREGS; i++) {
-		int local = cmethod->regusage[i];
-		if (local >= 0) {
-		  locals[-local] = regs[i];
-		}
-	      }
-	    }
-	    return table.start_pc(exception_index);
-	  }
-	}
-      }
-    }
-    cmethod = cmethod->next;
-  }
-  return -1;
-}
-
-void Thumb2_generate_exception_table(Compiled_Method *cmethod, Thumb2_Info *jinfo)
-{
-  methodOop method = jinfo->method;
-  ExceptionTable table(method);
-  constantPoolOop pool = method->constants();
-  int length = table.length();
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-
-  cmethod->exception_table = (unsigned *)out_pos(jinfo->codebuf);
-  for (int i = 0; i < length; i++) {
-    int beg_bci = table.start_pc(i);
-    int end_bci = table.end_pc(i);
-    unsigned stackinfo;
-    unsigned beg_offset, end_offset;
-
-    stackinfo = bc_stackinfo[beg_bci];
-    beg_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
-    stackinfo = bc_stackinfo[end_bci];
-    end_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
-    if (!(beg_offset != 0 && end_offset >= beg_offset && end_offset < 65536)) {
-	longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
-    }
-    out_32(jinfo->codebuf, (beg_offset << 16) | (end_offset));
-  }
-}
-
-void Thumb2_tablegen(Compiled_Method *cmethod, Thumb2_Info *jinfo)
-{
-  unsigned code_size = jinfo->code_size;
-  jubyte *code_base = jinfo->code_base;
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-  unsigned bci;
-  unsigned count = 0;
-  unsigned i;
-  CodeBuf *codebuf = jinfo->codebuf;
-
-  cmethod->osr_table = (unsigned *)out_pos(jinfo->codebuf);
-  out_32(codebuf, 0);
-  bc_stackinfo[0] |= BC_BACK_TARGET;
-  for (bci = 0; bci < code_size;) {
-    unsigned stackinfo = bc_stackinfo[bci];
-    unsigned bytecodeinfo;
-    unsigned opcode;
-
-    if (stackinfo & BC_BACK_TARGET) {
-      unsigned code_offset = (stackinfo & ~BC_FLAGS_MASK) >> 1;
-      JASSERT(stackinfo & BC_COMPILED, "back branch target not compiled???");
-      if (code_offset >= 65536) {
-	longjmp(compiler_error_env, COMPILER_RESULT_FAILED);
-      }
-//      JASSERT(code_offset < (1<<16), "oops, codesize too big");
-      out_32(codebuf, (bci << 16) | code_offset);
-      count++;
-    }
-
-    opcode = code_base[bci];
-    bytecodeinfo = bcinfo[opcode];
-    if (!BCI_SPECIAL(bytecodeinfo)) {
-      bci += BCI_LEN(bytecodeinfo);
-      continue;
-    } else {
-      int len = Bytecodes::length_for((Bytecodes::Code)opcode);
-      if (len <= 0) {
-	Bytecodes::Code code = Bytecodes::code_at(NULL, (address)(code_base+bci));
-	len = (Bytecodes::special_length_at
-	       (code,
-		(address)(code_base+bci), (address)(code_base+code_size)));
-      }
-      bci += len;
-    }
-  }
-  *cmethod->osr_table = count;
-  if (jinfo->method->has_exception_handler())
-    Thumb2_generate_exception_table(cmethod, jinfo);
-}
-
-extern "C" void Thumb2_Clear_Cache(char *base, char *limit);
-#define IS_COMPILED(e, cb) ((e) >= (unsigned)(cb) && (e) < (unsigned)(cb) + (cb)->size)
-
-unsigned Thumb2_osr_from_bci(Compiled_Method *cmethod, unsigned bci)
-{
-  unsigned *osr_table;
-  unsigned count;
-  unsigned i;
-
-  osr_table = cmethod->osr_table;
-  if (!osr_table) return 0;
-  count = *osr_table++;
-  for (i = 0; i < count; i++) {
-    unsigned u = *osr_table++;
-
-    if (bci == (u>>16)) return (u & 0xffff) << 1;
-  }
-  return 0;
-}
-
-extern "C" void Debug_Stack(intptr_t *stack)
-{
-  int i;
-  char msg[16];
-
-  tty->print("  Stack:");
-  for (i = 0; i < 6; i++) {
-    tty->print(" [");
-    sprintf(msg, "%d", i);
-    tty->print(msg);
-    tty->print("] = ");
-    sprintf(msg, "%08x", (int)stack[i]);
-    tty->print(msg);
-  }
-  tty->cr();
-}
-
-extern "C" void Debug_MethodEntry(interpreterState istate, intptr_t *stack, methodOop callee)
-{
- JDEBUG_(
-  if (DebugSwitch) {
-    methodOop method = istate->method();
-    tty->print("Entering ");
-    callee->print_short_name(tty);
-    tty->print(" from ");
-    method->print_short_name(tty);
-    tty->cr();
-    Debug_Stack(stack);
-    tty->flush();
-  }
- )
-}
-
-extern "C" void Debug_MethodExit(interpreterState istate, intptr_t *stack)
-{
- JDEBUG_(
-  if (DebugSwitch) {
-    methodOop method = istate->method();
-    JavaThread *thread = istate->thread();
-    oop exc = thread->pending_exception();
-
-    if (!exc) return;
-    tty->print("Leaving ");
-    method->print_short_name(tty);
-    tty->cr();
-    Debug_Stack(stack);
-    tty->flush();
-    if (exc) tty->print_cr("Exception %s", exc->print_value_string());
-  }
-)
-}
-
-extern "C" void Debug_MethodCall(interpreterState istate, intptr_t *stack, methodOop callee)
-{
- JDEBUG_(
-  if (DebugSwitch) {
-    methodOop method = istate->method();
-    tty->print("Calling ");
-    callee->print_short_name(tty);
-    tty->print(" from ");
-    method->print_short_name(tty);
-    tty->cr();
-    Debug_Stack(stack);
-    tty->flush();
-  }
- )
-}
-extern "C" void Thumb2_Install(methodOop mh, u32 entry);
-
-extern "C" unsigned cmpxchg_ptr(unsigned new_value, volatile unsigned *ptr, unsigned cmp_value);
-static volatile unsigned compiling;
-static unsigned CompileCount = 0;
-static unsigned MaxCompile = 10000;
-
-#define COMPILE_ONLY	0
-#define COMPILE_COUNT	0
-#define DISASS_AFTER	0
-//#define COMPILE_LIST
-
-#ifdef COMPILE_LIST
-static const char *compile_list[] = {
-	0
-};
-#endif
-
-static unsigned compiled_methods = 0;
-
-#ifdef T2_PRINT_STATISTICS
-static unsigned bytecodes_compiled = 0;
-static unsigned arm_code_generated = 0;
-static clock_t total_compile_time = 0;
-#endif
-
-extern unsigned CPUInfo;
-
-extern "C" unsigned long long Thumb2_Compile(JavaThread *thread, unsigned branch_pc)
-{
-  HandleMark __hm(thread);
-  frame fr = thread->last_frame();
-  methodOop method = fr.interpreter_frame_method();
-  Symbol *name = method->name();
-  Symbol *sig = method->signature();
-  const jbyte *base = sig->base();
-
-  jubyte *code_base = (jubyte *)method->code_base();
-  int code_size = method->code_size();
-  InvocationCounter* ic = method->invocation_counter();
-  InvocationCounter* bc = method->backedge_counter();
-  Thumb2_Info jinfo_str;
-  CodeBuf codebuf_str;
-  Thumb2_Stack jstack_str;
-  Thumb2_Registers jregs_str;
-  int idx;
-  u32 code_handle, slow_entry;
-  Thumb2_CodeBuf *cb = thumb2_codebuf;
-  int rc;
-  char *saved_hp;
-  Compiled_Method *cmethod;
-  u32 compiled_offset;
-  Thumb2_Entrypoint thumb_entry;
-  int compiled_accessor;
-
-  if (!(CPUInfo & ARCH_THUMB2))
-	UseCompiler = false;
-
-  {
-    bool ignore;
-    methodHandle mh(thread, method);
-    if (!UseCompiler || method->is_not_compilable()
-	|| CompilerOracle::should_exclude(mh, ignore)) {
-      ic->set(ic->state(), 1);
-      bc->set(ic->state(), 1);
-      return 0;
-    }
-  }
-
-  slow_entry = *(unsigned *)method->from_interpreted_entry();
-  if (IS_COMPILED(slow_entry, cb)) {
-    cmethod = (Compiled_Method *)(slow_entry & ~TBIT);
-    compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
-    if (compiled_offset == 0) return 0;
-    thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
-    thumb_entry.regusage = cmethod->regusage;
-    return *(unsigned long long *)&thumb_entry;
-  }
-
-  ic->decay();
-  bc->decay();
-
-  // Dont compile anything with code size >= 32K.
-  // We rely on the bytecode index fitting in 16 bits
-  //
-  // Dont compile anything with max stack + maxlocal > 1K
-  // The range of an LDR in T2 is -4092..4092
-  // Othersize we have difficulty access the locals from the stack pointer
-  //
-  if (code_size > THUMB2_MAX_BYTECODE_SIZE ||
-		(method->max_locals() + method->max_stack()) >= 1000) {
-        method->set_not_compilable();
-	return 0;
-  }
-
-  if (COMPILE_COUNT && compiled_methods == COMPILE_COUNT) return 0;
-
-  if (COMPILE_ONLY) {
-    if (strcmp(name->as_C_string(), COMPILE_ONLY) != 0) return 0;
-  }
-
-#ifdef COMPILE_LIST
-  {
-	const char **argv = compile_list;
-	const char *s;
-	while (s = *argv++) {
-		if (strcmp(s, method->name_and_sig_as_C_string()) == 0)
-			break;
-	}
-	if (!s) {
-		method->set_not_compilable();
-		return 0;
-	}
-  }
-#endif
-
-  saved_hp = cb->hp;
-  if (rc = setjmp(compiler_error_env)) {
-    cb->hp = saved_hp;
-    if (rc == COMPILER_RESULT_FAILED)
-        method->set_not_compilable();
-    if (rc == COMPILER_RESULT_FATAL)
-	UseCompiler = false;
-    compiling = 0;
-    return 0;
-  }
-
-  if (cmpxchg_ptr(1, &compiling, 0)) return 0;
-
-#ifdef T2_PRINT_STATISTICS
-  clock_t compile_time = clock();
-#endif
-
-#ifdef T2_PRINT_COMPILATION
-  if (PrintCompilation || PrintAssembly) {
-    fprintf(stderr, "Compiling %d %c%c %s\n",
-	compiled_methods,
-	method->is_synchronized() ? 'S' : ' ',
-	method->has_exception_handler() ? 'E' : ' ',
-	method->name_and_sig_as_C_string());
-  }
-#endif
-
-  memset(bc_stackinfo, 0, code_size * sizeof(unsigned));
-  memset(locals_info, 0, method->max_locals() * sizeof(unsigned));
-#ifdef T2_PRINT_DISASS
-  memset(start_bci, 0xff, sizeof(start_bci));
-  memset(end_bci, 0xff, sizeof(end_bci));
-#endif
-
-#ifdef THUMB2_JVMTI
-  address_bci_map_reset(thread);
-#endif // THUMB2_JVMTI
-
-  jinfo_str.thread = thread;
-  jinfo_str.method = method;
-  jinfo_str.code_base = code_base;
-  jinfo_str.code_size = code_size;
-  jinfo_str.bc_stackinfo = bc_stackinfo;
-  jinfo_str.locals_info = locals_info;
-  jinfo_str.compiled_return = 0;
-  for (int i = 0; i < 12; i++) jinfo_str.compiled_word_return[i] = 0;
-  jinfo_str.is_leaf = 1;
-  jinfo_str.use_istate = method->has_monitor_bytecodes();
-
-  Thumb2_local_info_from_sig(&jinfo_str, method, base);
-
-  Thumb2_pass1(&jinfo_str, 0, 0);
-
-  codebuf_str.codebuf = (unsigned short *)cb->hp;
-  codebuf_str.idx = 0;
-  codebuf_str.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
-
-  jstack_str.stack = stack;
-  jstack_str.depth = 0;
-
-  memset(r_local, 0, method->max_locals() * sizeof(unsigned));
-
-  jregs_str.r_local = r_local;
-
-  jinfo_str.codebuf = &codebuf_str;
-  jinfo_str.jstack = &jstack_str;
-  jinfo_str.jregs = &jregs_str;
-
-  jregs_str.pregs[0] = JAZ_V1;
-  jregs_str.pregs[1] = JAZ_V2;
-  jregs_str.pregs[2] = JAZ_V3;
-  jregs_str.pregs[3] = JAZ_V4;
-  jregs_str.pregs[4] = JAZ_V5;
-  jregs_str.pregs[5] = JAZ_V6;
-
-  jregs_str.npregs = PREGS;
-
-  Thumb2_RegAlloc(&jinfo_str);
-
-  slow_entry = out_align_offset(&codebuf_str, CODE_ALIGN, SLOW_ENTRY_OFFSET);
-  cmethod = (Compiled_Method *)slow_entry;
-  slow_entry |= TBIT;
-
-  cb->hp += codebuf_str.idx * 2;
-  codebuf_str.codebuf = (unsigned short *)cb->hp;
-  codebuf_str.idx = 0;
-  codebuf_str.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
-
-  compiled_accessor = 1;
-  if (!method->is_accessor() || !Thumb2_Accessor(&jinfo_str)) {
-    Thumb2_Enter(&jinfo_str);
-    Thumb2_codegen(&jinfo_str, 0);
-    compiled_accessor = 0;
-  }
-
-#ifdef T2_PRINT_DISASS
-  if (DISASS_AFTER == 0 || compiled_methods >= DISASS_AFTER)
-    if (PrintAssembly)
-      Thumb2_disass(&jinfo_str);
-#endif
-
-  for (int i = 0; i < PREGS; i++)
-    cmethod->regusage[i] = jregs_str.mapping[i];
-
-  Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf_str.idx * 2);
-
-#ifdef T2_PRINT_STATISTICS
-  compile_time = clock() - compile_time;
-  total_compile_time += compile_time;
-
-  if (t2_print_statistics) {
-    unsigned codegen = codebuf_str.idx * 2;
-    bytecodes_compiled += code_size;
-    arm_code_generated += codegen;
-    fprintf(stderr, "%d bytecodes => %d bytes code in %.2f sec, totals: %d => %d in %.2f sec\n",
-      code_size, codegen, (double)compile_time/(double)CLOCKS_PER_SEC,
-    bytecodes_compiled, arm_code_generated, (double)total_compile_time/(double)CLOCKS_PER_SEC);
-  }
-#endif
-
-  code_handle = out_align(&codebuf_str, sizeof(address));
-
-  out_32(&codebuf_str, slow_entry);
-
-  if (!compiled_accessor)
-    Thumb2_tablegen(cmethod, &jinfo_str);
-
-  cb->hp += codebuf_str.idx * 2;
-
-  *compiled_method_list_tail_ptr = cmethod;
-  compiled_method_list_tail_ptr = &(cmethod->next);
-
-  Thumb2_Install(method, code_handle);
-
-  compiled_methods++;
-
-  compiling = 0;
-
-  compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
-  if (compiled_offset == 0) return 0;
-  thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
-  thumb_entry.regusage = cmethod->regusage;
-
-#ifdef THUMB2_JVMTI
-  {
-    // we need to dispatch a compiled_method_load event
-    // to all registered Jvmti agents
-
-    // notify the whole generated code region for this Java method
-    // from slow_entry through to the end of the osr table. some
-    // of it is data not code but that's not a problem.
-
-    const void *gen_code_start = (const void *)(slow_entry ^ TBIT);
-    unsigned gen_code_size = codebuf_str.idx * 2;
-
-    // address_bci_map translates start addresses for generated code
-    // sections to bytecode indices and contains address_bci_map_length
-    // entries
-
-    // the final compile_info argument is supposed to contain
-    // information about inlined code. we can supply NULL for now -
-    // oprofile doesn't use it anyway
-
-    void *compile_info = NULL;
-
-    // transition from in Java to in VM before calling into Jvmti
-    ThreadInVMfromJava transition(thread);
-
-    JvmtiExport::post_compiled_method_load(method, gen_code_size,
-		gen_code_start, address_bci_map_length,
-		address_bci_map, NULL);
-  }
-#endif // THUMB2_JVMTI
-
-  return *(unsigned long long *)&thumb_entry;
-}
-
-extern "C" void Thumb2_DivZero_Handler(void);
-extern "C" void Thumb2_ArrayBounds_Handler(void);
-extern "C" void Thumb2_Handle_Exception(void);
-extern "C" void Thumb2_Handle_Exception_NoRegs(void);
-extern "C" void Thumb2_Exit_To_Interpreter(void);
-extern "C" void Thumb2_Stack_Overflow(void);
-extern "C" void Thumb2_monitorenter(void);
-
-extern "C" void __divsi3(void);
-extern "C" void __aeabi_ldivmod(void);
-extern "C" void __aeabi_i2f(void);
-extern "C" void __aeabi_i2d(void);
-extern "C" void __aeabi_l2f(void);
-extern "C" void __aeabi_l2d(void);
-extern "C" void __aeabi_f2d(void);
-extern "C" void __aeabi_d2f(void);
-extern "C" void Helper_new(void);
-extern "C" void Helper_instanceof(void);
-extern "C" void Helper_checkcast(void);
-extern "C" void Helper_monitorexit(void);
-extern "C" void Helper_aastore(void);
-extern "C" void Helper_aputfield(void);
-extern "C" void Helper_synchronized_enter(void);
-extern "C" void Helper_synchronized_exit(void);
-extern "C" void Helper_SafePoint(void);
-
-extern "C" void _ZN13SharedRuntime3f2iEf(void);
-extern "C" void _ZN13SharedRuntime3f2lEf(void);
-extern "C" void _ZN13SharedRuntime3d2iEd(void);
-extern "C" void _ZN13SharedRuntime3d2lEd(void);
-extern "C" void _ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei(void);
-extern "C" void _ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii(void);
-extern "C" void _ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi(void);
-extern "C" void _ZN18InterpreterRuntime3ldcEP10JavaThreadb(void);
-
-extern char Thumb2_stubs[];
-extern char Thumb2_stubs_end[];
-extern char Thumb2_idiv_stub[];
-extern char Thumb2_irem_stub[];
-extern char Thumb2_invokeinterface_stub[];
-extern char Thumb2_invokevirtual_stub[];
-extern char Thumb2_invokestatic_stub[];
-extern char Thumb2_invokespecial_stub[];
-extern char Thumb2_getfield_word_stub[];
-extern char Thumb2_getfield_sh_stub[];
-extern char Thumb2_getfield_h_stub[];
-extern char Thumb2_getfield_sb_stub[];
-extern char Thumb2_getfield_dw_stub[];
-extern char Thumb2_putfield_word_stub[];
-extern char Thumb2_putfield_h_stub[];
-extern char Thumb2_putfield_b_stub[];
-extern char Thumb2_putfield_a_stub[];
-extern char Thumb2_putfield_dw_stub[];
-extern char Thumb2_getstatic_word_stub[];
-extern char Thumb2_getstatic_sh_stub[];
-extern char Thumb2_getstatic_h_stub[];
-extern char Thumb2_getstatic_sb_stub[];
-extern char Thumb2_getstatic_dw_stub[];
-extern char Thumb2_putstatic_word_stub[];
-extern char Thumb2_putstatic_h_stub[];
-extern char Thumb2_putstatic_b_stub[];
-extern char Thumb2_putstatic_a_stub[];
-extern char Thumb2_putstatic_dw_stub[];
-
-extern char Thumb2_invokestaticresolved_stub[];
-extern char Thumb2_invokespecialresolved_stub[];
-extern char Thumb2_invokevirtualresolved_stub[];
-extern char Thumb2_invokevfinalresolved_stub[];
-
-#define STUBS_SIZE	(Thumb2_stubs_end-Thumb2_stubs)
-#define IDIV_STUB		(Thumb2_idiv_stub-Thumb2_stubs)
-#define IREM_STUB		(Thumb2_irem_stub-Thumb2_stubs)
-#define INVOKEINTERFACE_STUB	(Thumb2_invokeinterface_stub-Thumb2_stubs)
-#define INVOKEVIRTUAL_STUB	(Thumb2_invokevirtual_stub-Thumb2_stubs)
-#define INVOKESTATIC_STUB	(Thumb2_invokestatic_stub-Thumb2_stubs)
-#define INVOKESPECIAL_STUB	(Thumb2_invokespecial_stub-Thumb2_stubs)
-#define GETFIELD_WORD_STUB	(Thumb2_getfield_word_stub-Thumb2_stubs)
-#define GETFIELD_SH_STUB	(Thumb2_getfield_sh_stub-Thumb2_stubs)
-#define GETFIELD_H_STUB		(Thumb2_getfield_h_stub-Thumb2_stubs)
-#define GETFIELD_SB_STUB	(Thumb2_getfield_sb_stub-Thumb2_stubs)
-#define GETFIELD_DW_STUB	(Thumb2_getfield_dw_stub-Thumb2_stubs)
-#define PUTFIELD_WORD_STUB	(Thumb2_putfield_word_stub-Thumb2_stubs)
-#define PUTFIELD_H_STUB		(Thumb2_putfield_h_stub-Thumb2_stubs)
-#define PUTFIELD_B_STUB		(Thumb2_putfield_b_stub-Thumb2_stubs)
-#define PUTFIELD_A_STUB		(Thumb2_putfield_a_stub-Thumb2_stubs)
-#define PUTFIELD_DW_STUB	(Thumb2_putfield_dw_stub-Thumb2_stubs)
-#define GETSTATIC_WORD_STUB	(Thumb2_getstatic_word_stub-Thumb2_stubs)
-#define GETSTATIC_SH_STUB	(Thumb2_getstatic_sh_stub-Thumb2_stubs)
-#define GETSTATIC_H_STUB	(Thumb2_getstatic_h_stub-Thumb2_stubs)
-#define GETSTATIC_SB_STUB	(Thumb2_getstatic_sb_stub-Thumb2_stubs)
-#define GETSTATIC_DW_STUB	(Thumb2_getstatic_dw_stub-Thumb2_stubs)
-#define PUTSTATIC_WORD_STUB	(Thumb2_putstatic_word_stub-Thumb2_stubs)
-#define PUTSTATIC_H_STUB	(Thumb2_putstatic_h_stub-Thumb2_stubs)
-#define PUTSTATIC_B_STUB	(Thumb2_putstatic_b_stub-Thumb2_stubs)
-#define PUTSTATIC_A_STUB	(Thumb2_putstatic_a_stub-Thumb2_stubs)
-#define PUTSTATIC_DW_STUB	(Thumb2_putstatic_dw_stub-Thumb2_stubs)
-
-#define INVOKESTATIC_RESOLVED_STUB (Thumb2_invokestaticresolved_stub-Thumb2_stubs)
-#define INVOKESPECIAL_RESOLVED_STUB (Thumb2_invokespecialresolved_stub-Thumb2_stubs)
-#define INVOKEVIRTUAL_RESOLVED_STUB (Thumb2_invokevirtualresolved_stub-Thumb2_stubs)
-#define INVOKEVFINAL_RESOLVED_STUB (Thumb2_invokevfinalresolved_stub-Thumb2_stubs)
-
-extern "C" void Thumb2_NullPtr_Handler(void);
-
-
-extern "C" int Thumb2_Check_Null(unsigned *regs, unsigned pc)
-{
-  Thumb2_CodeBuf *cb = thumb2_codebuf;
-  if (!(CPUInfo & ARCH_THUMB2)) return 0;
-  if (IS_COMPILED(pc, cb)) {
-    regs[ARM_LR] = pc;
-    regs[ARM_PC] = (unsigned)Thumb2_NullPtr_Handler;
-    regs[ARM_CPSR] &= ~CPSR_THUMB_BIT;
-    return 1;
-  }
-  return 0;
-}
-
-extern "C" void Thumb2_Initialize(void)
-{
-  CodeBuf codebuf;
-  Thumb2_CodeBuf *cb;
-  u32 h_divzero;
-  u32 loc_irem, loc_idiv, loc_ldiv;
-  int rc;
-
-  if (!(CPUInfo & ARCH_THUMB2)) {
-    UseCompiler = false;
-    return;
-  }
-
-#ifdef T2_PRINT_COMPILATION
-  PrintCompilation |= getenv("T2_PRINT_COMPILATION") != NULL;
-#endif
-#ifdef T2_PRINT_STATISTICS
-  t2_print_statistics = getenv("T2_PRINT_STATISTICS");
-#endif
-#ifdef T2_PRINT_DISASS
-  PrintAssembly |= getenv("T2_PRINT_DISASS") != NULL;
-#endif
-#ifdef T2_PRINT_REGUSAGE
-  t2_print_regusage = getenv("T2_PRINT_REGUSAGE");
-#endif
-
-  cb = (Thumb2_CodeBuf *)mmap(0, THUMB2_CODEBUF_SIZE, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
-  if (cb == MAP_FAILED) {
-    UseCompiler = false;
-    return;
-  }
-
-  cb->size = THUMB2_CODEBUF_SIZE;
-  cb->hp = (char *)cb + sizeof(Thumb2_CodeBuf);
-  cb->sp = (char *)cb + THUMB2_CODEBUF_SIZE;
-
-  codebuf.codebuf = (unsigned short *)cb->hp;
-  codebuf.idx = 0;
-  codebuf.limit = (unsigned short *)cb->sp - (unsigned short *)cb->hp;
-
-  if (rc = setjmp(compiler_error_env)) {
-    UseCompiler = false;
-    return;
-  }
-
-#ifdef THUMB2_JVMTI
-  // cache the start of the generated stub region for notification later
-  stub_gen_code_start = cb->hp;
-#endif // THUMB2_JVMTI
-
-  memcpy(cb->hp, Thumb2_stubs, STUBS_SIZE);
-
-  // fprintf(stderr, "Thumb2_stubs offset: 0x%x\n",
-  // 	  (char*)(cb->hp) - (char*)Thumb2_stubs);
-
-  handlers[H_IDIV] = (unsigned)(cb->hp + IDIV_STUB);
-  handlers[H_IREM] = (unsigned)(cb->hp + IREM_STUB);
-handlers[H_INVOKEINTERFACE] = (unsigned)(cb->hp + INVOKEINTERFACE_STUB);
-  handlers[H_INVOKEVIRTUAL] = (unsigned)(cb->hp + INVOKEVIRTUAL_STUB);
-  handlers[H_INVOKESTATIC] = (unsigned)(cb->hp + INVOKESTATIC_STUB);
-  handlers[H_INVOKESPECIAL] = (unsigned)(cb->hp + INVOKESPECIAL_STUB);
-
-  handlers[H_GETFIELD_WORD] = (unsigned)(cb->hp + GETFIELD_WORD_STUB);
-  handlers[H_GETFIELD_SH] = (unsigned)(cb->hp + GETFIELD_SH_STUB);
-  handlers[H_GETFIELD_H] = (unsigned)(cb->hp + GETFIELD_H_STUB);
-  handlers[H_GETFIELD_SB] = (unsigned)(cb->hp + GETFIELD_SB_STUB);
-  handlers[H_GETFIELD_DW] = (unsigned)(cb->hp + GETFIELD_DW_STUB);
-
-  handlers[H_INVOKESTATIC_RESOLVED] = (unsigned)(cb->hp + INVOKESTATIC_RESOLVED_STUB);
-  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKESPECIAL_RESOLVED_STUB);
-  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKEVIRTUAL_RESOLVED_STUB);
-  handlers[H_INVOKEVFINAL] = (unsigned)(cb->hp + INVOKEVFINAL_RESOLVED_STUB);
-
-  handlers[H_PUTFIELD_WORD] = (unsigned)(cb->hp + PUTFIELD_WORD_STUB);
-  handlers[H_PUTFIELD_H] = (unsigned)(cb->hp + PUTFIELD_H_STUB);
-  handlers[H_PUTFIELD_B] = (unsigned)(cb->hp + PUTFIELD_B_STUB);
-  handlers[H_PUTFIELD_A] = (unsigned)(cb->hp + PUTFIELD_A_STUB);
-  handlers[H_PUTFIELD_DW] = (unsigned)(cb->hp + PUTFIELD_DW_STUB);
-
-  handlers[H_GETSTATIC_WORD] = (unsigned)(cb->hp + GETSTATIC_WORD_STUB);
-  handlers[H_GETSTATIC_SH] = (unsigned)(cb->hp + GETSTATIC_SH_STUB);
-  handlers[H_GETSTATIC_H] = (unsigned)(cb->hp + GETSTATIC_H_STUB);
-  handlers[H_GETSTATIC_SB] = (unsigned)(cb->hp + GETSTATIC_SB_STUB);
-  handlers[H_GETSTATIC_DW] = (unsigned)(cb->hp + GETSTATIC_DW_STUB);
-
-  handlers[H_PUTSTATIC_WORD] = (unsigned)(cb->hp + PUTSTATIC_WORD_STUB);
-  handlers[H_PUTSTATIC_H] = (unsigned)(cb->hp + PUTSTATIC_H_STUB);
-  handlers[H_PUTSTATIC_B] = (unsigned)(cb->hp + PUTSTATIC_B_STUB);
-  handlers[H_PUTSTATIC_A] = (unsigned)(cb->hp + PUTSTATIC_A_STUB);
-  handlers[H_PUTSTATIC_DW] = (unsigned)(cb->hp + PUTSTATIC_DW_STUB);
-
-  codebuf.idx += (Thumb2_stubs_end-Thumb2_stubs) >> 1;
-
-  // Disassemble the codebuf we just created.  For debugging.  This
-  // first part is all ARM code; the part that we're about to create
-  // is Thumb code.
-  if (PrintAssembly) {
-    Hsdis hsdis;
-    hsdis.decode_instructions(cb->hp, cb->hp + codebuf.idx * 2,
-			      print_address, NULL, NULL, stderr,
-			      "");
-    fputc('\n', stderr);
-  }
-
-  char *begin_thumb_code = cb->hp + codebuf.idx * 2;
-
-  handlers[H_LDIV] = handlers[H_LREM] = out_pos(&codebuf);
-  dop_reg(&codebuf, DP_ORR, ARM_IP, ARM_R2, ARM_R3, 0, 0);
-  loc_ldiv = forward_16(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_ldivmod);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-  bcc_patch(&codebuf, COND_EQ, loc_ldiv);
-  mov_imm(&codebuf, ARM_IP, (u32)Thumb2_DivZero_Handler);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_ARRAYBOUND] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_ArrayBounds_Handler);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-  handlers[H_HANDLE_EXCEPTION] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-  handlers[H_HANDLE_EXCEPTION_NO_REGS] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception_NoRegs);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-  handlers[H_STACK_OVERFLOW] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Stack_Overflow);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-  handlers[H_DREM] = out_pos(&codebuf);
-  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_imm(&codebuf, ARM_IP, (u32)fmod);
-#ifdef __ARM_PCS_VFP
-  vmov_reg_d_toVFP(&codebuf, VFP_D0, ARM_R0, ARM_R1);
-  vmov_reg_d_toVFP(&codebuf, VFP_D1, ARM_R2, ARM_R3);
-#endif
-  blx_reg(&codebuf, ARM_IP);
-#ifdef __ARM_PCS_VFP
-  vmov_reg_d_toARM(&codebuf, ARM_R0, ARM_R1, VFP_D0);
-#endif
-  ldm(&codebuf, (1<<ARM_PC), ARM_SP, POP_FD, 1);
-
-  handlers[H_FREM] = out_pos(&codebuf);
-  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_imm(&codebuf, ARM_R3, (u32)fmodf);
-#ifdef __ARM_PCS_VFP
-  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
-  vmov_reg_s_toVFP(&codebuf, VFP_S1, ARM_R1);
-#endif
-  blx_reg(&codebuf, ARM_R3);
-#ifdef __ARM_PCS_VFP
-  vmov_reg_s_toARM(&codebuf, ARM_R0, VFP_S0);
-#endif
-  ldm(&codebuf, (1<<ARM_PC), ARM_SP, POP_FD, 1);
-
-  handlers[H_I2F] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_i2f);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_I2D] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_i2d);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_L2F] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_l2f);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_L2D] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_l2d);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_F2I] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3f2iEf);
-#ifdef __ARM_PCS_VFP
-  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
-#endif
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_F2L] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3f2lEf);
-#ifdef __ARM_PCS_VFP
-  vmov_reg_s_toVFP(&codebuf, VFP_S0, ARM_R0);
-#endif
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_F2D] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_f2d);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_D2I] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3d2iEd);
-#ifdef __ARM_PCS_VFP
-  vmov_reg_d_toVFP(&codebuf, VFP_S0, ARM_R0, ARM_R1);
-#endif
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_D2L] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)_ZN13SharedRuntime3d2lEd);
-#ifdef __ARM_PCS_VFP
-  vmov_reg_d_toVFP(&codebuf, VFP_S0, ARM_R0, ARM_R1);
-#endif
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-  handlers[H_D2F] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_IP, (u32)__aeabi_d2f);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-// NEW Stub
-//   r1 = index
-//   r3 = bci
-//   result -> R0, == 0 => exception
-  handlers[H_NEW] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
-  ldr_imm(&codebuf, ARM_R2, ARM_R0, ISTATE_METHOD, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)Helper_new);
-  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R2, ARM_R2, ARM_R3);
-sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_R2, ARM_R0, ISTATE_BCP, 1, 0);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-// NEWARRAY Stub
-//   r1 = atype
-//   r2 = tos
-//   r3 = bci
-//   result -> thread->vm_result
-  handlers[H_NEWARRAY] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R3, ARM_R0, ARM_R3);
-  mov_reg(&codebuf, ARM_R0, Rthread);
-  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-// ANEWARRAY Stub
-//   r0 = bci
-//   r2 = index
-//   r3 = tos
-//   result -> thread->vm_result
-  handlers[H_ANEWARRAY] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
-  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
-
-  sub_imm(&codebuf, ARM_R1, Rstack, 4);
-  str_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
-
-  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTANTS, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii);
-  mov_reg(&codebuf, ARM_R0, Rthread);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-// MULTIANEWARRAY Stub
-//   r0 = bci
-//   r1 = dimensions (*4)
-  handlers[H_MULTIANEWARRAY] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
-  add_reg(&codebuf, Rstack, Rstack, ARM_R1);
-  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi);
-  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
-  mov_reg(&codebuf, ARM_R0, Rthread);
-  sub_imm(&codebuf, ARM_R1, Rstack, 4);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-// LDC Stub
-//   r0 = bci
-  handlers[H_LDC] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
-  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
-  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
-  mov_reg(&codebuf, ARM_R0, Rthread);
-  mov_imm(&codebuf, ARM_R1, 0);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-// LDC_W Stub
-//   r0 = bci
-  handlers[H_LDC_W] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
-  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
-  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
-  mov_reg(&codebuf, ARM_R0, Rthread);
-  mov_imm(&codebuf, ARM_R1, 1);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-// INSTANCEOF Stub
-//   r1 = index
-//   r2 = tos
-//   r3 = bci
-//   result -> R0, == -1 => exception
-  handlers[H_INSTANCEOF] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
-  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
-  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
-  sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)Helper_instanceof);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-// CHECKCAST Stub
-//   r1 = index
-//   r2 = tos
-//   r3 = bci
-//   result -> R0, != 0 => exception
-  handlers[H_CHECKCAST] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
-  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
-  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
-  sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)Helper_checkcast);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-// MONITORENTER
-//   r0 = bci
-  handlers[H_MONITORENTER] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_monitorenter);
-  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-// MONITOREXIT Stub
-//   r1 = tos
-//   r3 = bci
-//   result -> R0, != 0 => exception
-  handlers[H_MONITOREXIT] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
-  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
-  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
-  sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)Helper_monitorexit);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-// AASTORE Stub
-//   r0 = bci
-//   r1 = value
-//   r2 = index
-//   r3 = arrayref
-  handlers[H_AASTORE] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
-  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
-  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_BCP, 1, 0);
-  sub_imm(&codebuf, ARM_IP, Rstack, 4);
-  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_STACK, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)Helper_aastore);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-// APUTFIELD Stub
-//   r0 = obj
-  handlers[H_APUTFIELD] = out_pos(&codebuf);
-  mov_imm(&codebuf, ARM_R3, (u32)Helper_aputfield);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-// SYNCHRONIZED_ENTER Stub
-//   r0 = bci
-//   Rstack = monitor
-  handlers[H_SYNCHRONIZED_ENTER] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R2, ARM_R2, ARM_R0);
-  str_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
-
-  sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, ARM_R1, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
-
-  mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_enter);
-  mov_reg(&codebuf, ARM_R0, Rthread);
-  mov_reg(&codebuf, ARM_R1, Rstack);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-//
-// SYNCHRONIZED_EXIT Stub
-//   r0 = bci
-//   r1 = monitor
-  handlers[H_SYNCHRONIZED_EXIT] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-
-  ldr_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
-  sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
-  str_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_exit);
-  mov_reg(&codebuf, ARM_R0, Rthread);
-  mov_reg(&codebuf, ARM_PC, ARM_IP);
-
-#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
-
-// DEBUG_METHDENTRY
-  handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
-  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_reg(&codebuf, ARM_R2, ARM_R0);
-  mov_reg(&codebuf, ARM_R0, ARM_R8);
-  mov_reg(&codebuf, ARM_R1, ARM_R4);
-  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodEntry);
-  blx_reg(&codebuf, ARM_IP);
-  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
-
-// DEBUG_METHODEXIT
-  handlers[H_DEBUG_METHODEXIT] = out_pos(&codebuf);
-  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_reg(&codebuf, ARM_R0, ARM_R8);
-  mov_reg(&codebuf, ARM_R1, ARM_R4);
-  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodExit);
-  blx_reg(&codebuf, ARM_IP);
-  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
-
-// DEBUG_METHODCALL
-  handlers[H_DEBUG_METHODCALL] = out_pos(&codebuf);
-  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_reg(&codebuf, ARM_R2, ARM_R0);
-  mov_reg(&codebuf, ARM_R0, ARM_R8);
-  mov_reg(&codebuf, ARM_R1, ARM_R4);
-  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodCall);
-  blx_reg(&codebuf, ARM_IP);
-  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
-
-// EXIT_TO_INTERPRETER
-//   r0 = bci
-  handlers[H_EXIT_TO_INTERPRETER] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Exit_To_Interpreter);
-  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-// H_SAFEPOINT
-  handlers[H_SAFEPOINT] = out_pos(&codebuf);
-  stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-
-  // The frame walking code used by the garbage collector
-  // (frame::interpreter_frame_tos_address()) assumes that the stack
-  // pointer points one word below the top item on the stack, so we
-  // have to adjust the SP saved in istate accordingly.  If we don't,
-  // the value on TOS won't be seen by the GC and we will crash later.
-  sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK, 1, 0);
-
-  // Set up BytecodeInterpreter->_bcp for the GC
-  // bci+CONSTMETHOD_CODEOFFSET is passed in ARM_R1
-  // istate is passed in ARM_R2
-  ldr_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_METHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
-  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_BCP, 1, 0);
-
-  mov_imm(&codebuf, ARM_IP, (u32)Helper_SafePoint);
-  mov_reg(&codebuf, ARM_R0, Rthread);
-  blx_reg(&codebuf, ARM_IP);
-  ldm(&codebuf, (1<<ARM_LR), ARM_SP, POP_FD, 1);
-  cmp_imm(&codebuf, ARM_R0, 0);
-
-  // The sequence here is delicate.  We need to seet things up so that
-  // it looks as though Thumb2_Handle_Exception_NoRegs was called
-  // directly from a compiled method.
-  it(&codebuf, COND_EQ, IT_MASK_T);
-  mov_reg(&codebuf, ARM_PC, ARM_LR);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Handle_Exception_NoRegs);
-  mov_reg(&codebuf, ARM_PC, ARM_R3);
-
-  // Disassemble the codebuf we just created.  For debugging
-  if (PrintAssembly) {
-    Hsdis hsdis;
-    hsdis.decode_instructions(begin_thumb_code, cb->hp + codebuf.idx * 2,
-			      print_address, NULL, NULL, stderr,
-			      "force-thumb");
-    fputc('\n', stderr);
-  }
-
-  Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf.idx * 2);
-  cb->hp += codebuf.idx * 2;
-
-  thumb2_codebuf = cb;
-
-#ifdef THUMB2_JVMTI
-  // cache the end of the generated stub region for notification later
-  stub_gen_code_end = cb->hp;
-#endif // THUMB2_JVMTI
-}
-
-#endif // T2JIT
-
-#endif // __arm__