changeset 8576:b2df86902f5e

Add support for large code cache
author enevill
date Wed, 09 Dec 2015 13:08:38 +0000
parents d74991e8f574
children 0096f1ef564e
files src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/assembler_aarch64.cpp src/cpu/aarch64/vm/assembler_aarch64.hpp src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp src/cpu/aarch64/vm/compiledIC_aarch64.cpp src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp src/cpu/aarch64/vm/globals_aarch64.hpp src/cpu/aarch64/vm/icBuffer_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.hpp src/cpu/aarch64/vm/methodHandles_aarch64.cpp src/cpu/aarch64/vm/nativeInst_aarch64.cpp src/cpu/aarch64/vm/nativeInst_aarch64.hpp src/cpu/aarch64/vm/relocInfo_aarch64.cpp src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp src/cpu/aarch64/vm/stubGenerator_aarch64.cpp src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp src/cpu/aarch64/vm/vtableStubs_aarch64.cpp src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp src/share/vm/runtime/arguments.cpp src/share/vm/utilities/globalDefinitions.hpp
diffstat 25 files changed, 464 insertions(+), 184 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Wed Dec 09 13:08:38 2015 +0000
@@ -952,13 +952,13 @@
   static int emit_deopt_handler(CodeBuffer& cbuf);
 
   static uint size_exception_handler() {
-    // count up to 4 movz/n/k instructions and one branch instruction
-    return 5 * NativeInstruction::instruction_size;
+    return MacroAssembler::far_branch_size();
   }
 
   static uint size_deopt_handler() {
-    // count one adr and one branch instruction
-    return 2 * NativeInstruction::instruction_size;
+    // count one adr and one far branch instruction
+    // return 4 * NativeInstruction::instruction_size;
+    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
   }
 };
 
@@ -2845,16 +2845,18 @@
 
 int MachCallRuntimeNode::ret_addr_offset() {
   // for generated stubs the call will be
-  //   bl(addr)
+  //   far_call(addr)
   // for real runtime callouts it iwll be
   //   mov(rscratch1, RuntimeAddress(addr)
   //   blrt rscratch1
   CodeBlob *cb = CodeCache::find_blob(_entry_point);
   if (cb) {
-    return 4;
+    return MacroAssembler::far_branch_size();
   } else {
     // A 48-bit address.  See movptr().
-    return 16;
+    // then a blrt
+    // return 16;
+    return 4 * NativeInstruction::instruction_size;
   }
 }
 
@@ -3321,13 +3323,12 @@
   // This is the unverified entry point.
   MacroAssembler _masm(&cbuf);
 
-  // no need to worry about 4-byte of br alignment on AArch64
   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
   Label skip;
   // TODO
   // can we avoid this skip and still use a reloc?
   __ br(Assembler::EQ, skip);
-  __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
   __ bind(skip);
 }
 
@@ -3352,7 +3353,7 @@
   __ start_a_stub(size_exception_handler());
   if (base == NULL)  return 0;  // CodeBuffer::expand failed
   int offset = __ offset();
-  __ b(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
   __ end_a_stub();
   return offset;
@@ -3370,8 +3371,7 @@
   int offset = __ offset();
 
   __ adr(lr, __ pc());
-  // should we load this into rscratch1 and use a br?
-  __ b(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 
   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
   __ end_a_stub();
@@ -4695,11 +4695,11 @@
     address addr = (address)$meth$$method;
     if (!_method) {
       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-      __ bl(Address(addr, relocInfo::runtime_call_type));
+      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
     } else if (_optimized_virtual) {
-      __ bl(Address(addr, relocInfo::opt_virtual_call_type));
+      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
     } else {
-      __ bl(Address(addr, relocInfo::static_call_type));
+      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
     }
 
     if (_method) {
@@ -4712,22 +4712,19 @@
     MacroAssembler _masm(&cbuf);
     relocInfo::relocType reloc;
 
-    // TODO fixme
-    // this is supposed to preserve and restore SP around the call
-    // need to check it works
+    // RFP is preserved across all calls, even compiled calls.
+    // Use it to preserve SP.
     __ mov(rfp, sp);
 
     address mark = __ pc();
     address addr = (address)$meth$$method;
     if (!_method) {
-      // TODO check this
-      // think we are calling generated Java here not x86
       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-      __ bl(Address(addr, relocInfo::runtime_call_type));
+      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
     } else if (_optimized_virtual) {
-      __ bl(Address(addr, relocInfo::opt_virtual_call_type));
+      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
     } else {
-      __ bl(Address(addr, relocInfo::static_call_type));
+      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
     }
 
     if (_method) {
@@ -4762,7 +4759,7 @@
     address entry = (address)$meth$$method;
     CodeBlob *cb = CodeCache::find_blob(entry);
     if (cb) {
-      __ bl(Address(entry));
+      __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
     } else {
       int gpcnt;
       int fpcnt;
@@ -4775,7 +4772,7 @@
 
   enc_class aarch64_enc_rethrow() %{
     MacroAssembler _masm(&cbuf);
-    __ b(RuntimeAddress(OptoRuntime::rethrow_stub()));
+    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
   %}
 
   enc_class aarch64_enc_ret() %{
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -1365,7 +1365,6 @@
   if (L.is_bound()) {
     br(cc, target(L));
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     br(cc, pc());
   }
@@ -1376,7 +1375,6 @@
   if (L.is_bound()) {
     (this->*insn)(target(L));
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     (this->*insn)(pc());
   }
@@ -1387,7 +1385,6 @@
   if (L.is_bound()) {
     (this->*insn)(r, target(L));
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     (this->*insn)(r, pc());
   }
@@ -1398,7 +1395,6 @@
   if (L.is_bound()) {
     (this->*insn)(r, bitpos, target(L));
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     (this->*insn)(r, bitpos, pc());
   }
@@ -1408,7 +1404,6 @@
   if (L.is_bound()) {
     (this->*insn)(target(L), op);
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     (this->*insn)(pc(), op);
   }
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Wed Dec 09 13:08:38 2015 +0000
@@ -849,16 +849,28 @@
 
 #undef INSN
 
+  // The maximum range of a branch is fixed for the AArch64
+  // architecture.  In debug mode we shrink it in order to test
+  // trampolines, but not so small that branches in the interpreter
+  // are out of range.
+  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
+
+  static bool reachable_from_branch_at(address branch, address target) {
+    return uabs(target - branch) < branch_range;
+  }
+
   // Unconditional branch (immediate)
-#define INSN(NAME, opcode)					\
-  void NAME(address dest) {					\
-    starti;							\
-    long offset = (dest - pc()) >> 2;				\
-    f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);	\
-  }								\
-  void NAME(Label &L) {						\
-    wrap_label(L, &Assembler::NAME);				\
-  }								\
+
+#define INSN(NAME, opcode)                                              \
+  void NAME(address dest) {                                             \
+    starti;                                                             \
+    long offset = (dest - pc()) >> 2;                                   \
+    DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
+    f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);               \
+  }                                                                     \
+  void NAME(Label &L) {                                                 \
+    wrap_label(L, &Assembler::NAME);                                    \
+  }                                                                     \
   void NAME(const Address &dest);
 
   INSN(b, 0);
--- a/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -115,7 +115,7 @@
   __ bind(_entry);
   ce->store_parameter(_method->as_register(), 1);
   ce->store_parameter(_bci, 0);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   __ b(_continuation);
@@ -134,7 +134,7 @@
   __ bind(_entry);
   if (_info->deoptimize_on_exception()) {
     address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
-    __ call(RuntimeAddress(a));
+    __ far_call(RuntimeAddress(a));
     ce->add_call_info_here(_info);
     ce->verify_oop_map(_info);
     debug_only(__ should_not_reach_here());
@@ -152,7 +152,7 @@
   } else {
     stub_id = Runtime1::throw_range_check_failed_id;
   }
-  __ call(RuntimeAddress(Runtime1::entry_for(stub_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(stub_id)), NULL, rscratch2);
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   debug_only(__ should_not_reach_here());
@@ -165,7 +165,7 @@
 void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
   address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
-  __ call(RuntimeAddress(a));
+  __ far_call(RuntimeAddress(a));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   debug_only(__ should_not_reach_here());
@@ -176,7 +176,7 @@
     ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
   }
   __ bind(_entry);
-  __ bl(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
+  __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
 #ifdef ASSERT
@@ -206,7 +206,7 @@
   assert(__ rsp_offset() == 0, "frame size should be fixed");
   __ bind(_entry);
   __ mov(r3, _klass_reg->as_register());
-  __ bl(RuntimeAddress(Runtime1::entry_for(_stub_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   assert(_result->as_register() == r0, "result must in r0,");
@@ -231,7 +231,7 @@
   __ bind(_entry);
   assert(_length->as_register() == r19, "length must in r19,");
   assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
-  __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   assert(_result->as_register() == r0, "result must in r0");
@@ -254,7 +254,7 @@
   __ bind(_entry);
   assert(_length->as_register() == r19, "length must in r19,");
   assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
-  __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   assert(_result->as_register() == r0, "result must in r0");
@@ -280,7 +280,7 @@
   } else {
     enter_id = Runtime1::monitorenter_nofpu_id;
   }
-  __ bl(RuntimeAddress(Runtime1::entry_for(enter_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   __ b(_continuation);
@@ -302,7 +302,7 @@
     exit_id = Runtime1::monitorexit_nofpu_id;
   }
   __ adr(lr, _continuation);
-  __ b(RuntimeAddress(Runtime1::entry_for(exit_id)));
+  __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
 }
 
 
@@ -326,7 +326,7 @@
 
 void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
   ce->add_call_info_here(_info);
   DEBUG_ONLY(__ should_not_reach_here());
 }
@@ -343,7 +343,7 @@
 
   ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
   __ bind(_entry);
-  __ call(RuntimeAddress(a));
+  __ far_call(RuntimeAddress(a));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   debug_only(__ should_not_reach_here());
@@ -359,7 +359,7 @@
   if (_obj->is_cpu_register()) {
     __ mov(rscratch1, _obj->as_register());
   }
-  __ call(RuntimeAddress(Runtime1::entry_for(_stub)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, rscratch2);
   ce->add_call_info_here(_info);
   debug_only(__ should_not_reach_here());
 }
@@ -401,7 +401,7 @@
   ce->emit_static_call_stub();
   Address resolve(SharedRuntime::get_resolve_static_call_stub(),
 		  relocInfo::static_call_type);
-  __ bl(resolve);
+  __ trampoline_call(resolve);
   ce->add_call_info_here(info());
 
 #ifndef PRODUCT
@@ -432,7 +432,7 @@
   }
   __ cbz(pre_val_reg, _continuation);
   ce->store_parameter(pre_val()->as_register(), 0);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
   __ b(_continuation);
 }
 
@@ -453,7 +453,7 @@
   Register new_val_reg = new_val()->as_register();
   __ cbz(new_val_reg, _continuation);
   ce->store_parameter(addr()->as_pointer_register(), 0);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
   __ b(_continuation);
 }
 
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -297,7 +297,7 @@
   // Note: RECEIVER must still contain the receiver!
   Label dont;
   __ br(Assembler::EQ, dont);
-  __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
   // We align the verified entry point unless the method body
   // (including its inline cache check) will fit in a single 64-byte
@@ -344,7 +344,7 @@
   default: ShouldNotReachHere();
   }
 
-  __ bl(RuntimeAddress(target));
+  __ far_call(RuntimeAddress(target));
   add_call_info_here(info);
 }
 
@@ -390,8 +390,7 @@
   __ verify_not_null_oop(r0);
 
   // search an exception handler (r0: exception oop, r3: throwing pc)
-  __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
-  __ should_not_reach_here();
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
   guarantee(code_offset() - offset <= exception_handler_size, "overflow");
   __ end_a_stub();
 
@@ -446,7 +445,7 @@
   // remove the activation and dispatch to the unwind handler
   __ block_comment("remove_frame and dispatch to the unwind handler");
   __ remove_frame(initial_frame_size_in_bytes());
-  __ b(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
+  __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
 
   // Emit the slow path assembly
   if (stub != NULL) {
@@ -476,7 +475,7 @@
   int offset = code_offset();
 
   __ adr(lr, pc());
-  __ b(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
   guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
   __ end_a_stub();
 
@@ -954,7 +953,7 @@
   default: ShouldNotReachHere();
   }
 
-  __ bl(RuntimeAddress(target));
+  __ far_call(RuntimeAddress(target));
   add_call_info_here(info);
 }
 
@@ -1449,7 +1448,7 @@
         __ br(Assembler::EQ, *success_target);
 
 	__ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
-        __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+        __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
 	__ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize)));
         // result is a boolean
 	__ cbzw(klass_RInfo, *failure_target);
@@ -1460,7 +1459,7 @@
       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
       // call out-of-line instance of __ check_klass_subtype_slow_path(...):
       __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
-      __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
       __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
       // result is a boolean
       __ cbz(k_RInfo, *failure_target);
@@ -1550,7 +1549,7 @@
     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
     __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
-    __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
     __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
     // result is a boolean
     __ cbzw(k_RInfo, *failure_target);
@@ -2041,7 +2040,7 @@
 
 
 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
-  __ bl(Address(op->addr(), rtype));
+  __ trampoline_call(Address(op->addr(), rtype));
   add_call_info(code_offset(), op->info());
 }
 
@@ -2070,7 +2069,8 @@
 
   __ relocate(static_stub_Relocation::spec(call_pc));
   __ mov_metadata(rmethod, (Metadata*)NULL);
-  __ b(__ pc());
+  __ movptr(rscratch1, 0);
+  __ br(rscratch1);
 
   assert(__ offset() - start <= call_stub_size, "stub too big");
   __ end_a_stub();
@@ -2100,7 +2100,7 @@
   } else {
     unwind_id = Runtime1::handle_exception_nofpu_id;
   }
-  __ bl(RuntimeAddress(Runtime1::entry_for(unwind_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
 
   // FIXME: enough room for two byte trap   ????
   __ nop();
@@ -2263,7 +2263,7 @@
         __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
       }
 #endif
-      __ bl(RuntimeAddress(copyfunc_addr));
+      __ far_call(RuntimeAddress(copyfunc_addr));
     }
 
     __ cbz(r0, *stub->continuation());
@@ -2376,7 +2376,7 @@
       __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
 
       __ PUSH(src, dst);
-      __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
       __ POP(src, dst);
 
       __ cbnz(src, cont);
@@ -2426,7 +2426,7 @@
         __ load_klass(c_rarg4, dst);
         __ ldr(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset()));
         __ ldrw(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
-        __ call(RuntimeAddress(copyfunc_addr));
+        __ far_call(RuntimeAddress(copyfunc_addr));
 
 #ifndef PRODUCT
         if (PrintC1Statistics) {
@@ -2541,7 +2541,7 @@
 
  CodeBlob *cb = CodeCache::find_blob(entry);
  if (cb) {
-   __ bl(RuntimeAddress(entry));
+   __ far_call(RuntimeAddress(entry));
  } else {
    __ call_VM_leaf(entry, 3);
  }
@@ -2878,7 +2878,7 @@
 
   CodeBlob *cb = CodeCache::find_blob(dest);
   if (cb) {
-    __ bl(RuntimeAddress(dest));
+    __ far_call(RuntimeAddress(dest));
   } else {
     __ mov(rscratch1, RuntimeAddress(dest));
     int len = args->length();
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp	Wed Dec 09 13:08:38 2015 +0000
@@ -72,9 +72,9 @@
   void store_parameter(jint c,     int offset_from_esp_in_words);
   void store_parameter(jobject c,  int offset_from_esp_in_words);
 
-  enum { call_stub_size = NOT_LP64(15) LP64_ONLY(28),
-         exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
-         deopt_handler_size = NOT_LP64(10) LP64_ONLY(17)
-       };
+enum { call_stub_size = 12 * NativeInstruction::instruction_size,
+       exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
+       deopt_handler_size = 7 * NativeInstruction::instruction_size };
+
 
 #endif // CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -351,7 +351,7 @@
 
   if (CURRENT_ENV->dtrace_alloc_probes()) {
     assert(obj == r0, "must be");
-    call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
   }
 
   verify_oop(obj);
@@ -385,7 +385,7 @@
 
   if (CURRENT_ENV->dtrace_alloc_probes()) {
     assert(obj == r0, "must be");
-    bl(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
   }
 
   verify_oop(obj);
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -97,11 +97,11 @@
     }
     if (frame_size() == no_frame_size) {
       leave();
-      b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+      far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
     } else if (_stub_id == Runtime1::forward_exception_id) {
       should_not_reach_here();
     } else {
-      b(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+      far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
     }
     bind(L);
   }
@@ -324,7 +324,7 @@
 
 
 // target: the entry point of the method that creates and posts the exception oop
-// has_argument: true if the exception needs an argument (passed on stack because registers must be preserved)
+// has_argument: true if the exception needs an argument (passed in rscratch1)
 
 OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
   // make a frame and preserve the caller's caller-save registers
@@ -580,7 +580,7 @@
 
     { Label L1;
       __ cbnz(r0, L1);                                  // have we deoptimized?
-      __ b(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
       __ bind(L1);
     }
 
@@ -624,7 +624,7 @@
     // registers and must leave throwing pc on the stack.  A patch may
     // have values live in registers so the entry point with the
     // exception in tls.
-    __ b(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
+    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
 
     __ bind(L);
   }
@@ -641,7 +641,7 @@
   // registers, pop all of our frame but the return address and jump to the deopt blob
   restore_live_registers(sasm);
   __ leave();
-  __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
 
   __ bind(cont);
   restore_live_registers(sasm);
@@ -1095,7 +1095,7 @@
         DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
         assert(deopt_blob != NULL, "deoptimization blob must have been created");
         __ leave();
-        __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
       }
       break;
 
@@ -1304,7 +1304,7 @@
         DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
         assert(deopt_blob != NULL, "deoptimization blob must have been created");
 
-        __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
       }
       break;
 
--- a/src/cpu/aarch64/vm/compiledIC_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/compiledIC_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -70,7 +70,8 @@
   __ relocate(static_stub_Relocation::spec(mark));
   // static stub relocation also tags the Method* in the code-stream.
   __ mov_metadata(rmethod, (Metadata*)NULL);
-  __ b(__ pc());
+  __ movptr(rscratch1, 0);
+  __ br(rscratch1);
 
   assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
   __ end_a_stub();
@@ -78,8 +79,7 @@
 #undef __
 
 int CompiledStaticCall::to_interp_stub_size() {
-  // count a mov mem --> to 3 movz/k and a branch
-  return 4 * NativeInstruction::instruction_size;
+  return 7 * NativeInstruction::instruction_size;
 }
 
 // Relocation entries for call stub, compiled java to interpreter.
--- a/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp	Wed Dec 09 13:08:38 2015 +0000
@@ -37,4 +37,8 @@
 
 #define SUPPORTS_NATIVE_CX8
 
+// The maximum B/BL offset range on AArch64 is 128MB.
+#undef CODE_CACHE_DEFAULT_LIMIT
+#define CODE_CACHE_DEFAULT_LIMIT (128*M)
+
 #endif // CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Wed Dec 09 13:08:38 2015 +0000
@@ -101,6 +101,7 @@
                                                                         \
   product(bool, UseCRC32, false,                                        \
           "Use CRC32 instructions for CRC32 computation")               \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler")
 
 // Don't attempt to use Neon on builtin sim until builtin sim supports it
 #define UseNeon false
@@ -120,7 +121,8 @@
   product(bool, UseNeon, false,                                         \
           "Use Neon for CRC32 computation")                             \
   product(bool, UseCRC32, false,                                        \
-          "Use CRC32 instructions for CRC32 computation")
+          "Use CRC32 instructions for CRC32 computation")               \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler")
 
 #endif
 
--- a/src/cpu/aarch64/vm/icBuffer_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/icBuffer_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -36,9 +36,10 @@
 #include "oops/oop.inline2.hpp"
 
 int InlineCacheBuffer::ic_stub_code_size() {
-  return NativeInstruction::instruction_size * 5;
+  return (MacroAssembler::far_branches() ? 6 : 4) * NativeInstruction::instruction_size;
 }
 
+#define __ masm->
 
 void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
   ResourceMark rm;
@@ -50,13 +51,15 @@
   // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
   // assert(cached_value == NULL || cached_oop->is_perm(), "must be perm oop");
 
+  address start = __ pc();
   Label l;
-  masm->ldr(rscratch2, l);
-  masm->b(ExternalAddress(entry_point));
-  masm->bind(l);
-  masm->emit_int64((int64_t)cached_value);
-  // Only need to invalidate the 1st two instructions - not the whole ic stub
-  ICache::invalidate_range(code_begin, NativeInstruction::instruction_size * 2);
+
+  __ ldr(rscratch2, l);
+  __ far_jump(ExternalAddress(entry_point));
+  __ bind(l);
+  __ emit_int64((int64_t)cached_value);
+  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
+  assert(__ pc() - start == ic_stub_code_size(), "must be");
 }
 
 address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
@@ -67,8 +70,8 @@
 
 
 void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
-  // creation also verifies the object
-  uintptr_t *p = (uintptr_t *)(code_begin + 8);
+  // The word containing the cached value is at the end of this IC buffer
+  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
   void* o = (void*)*p;
   return o;
 }
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -149,7 +149,7 @@
     Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
     Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
     Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
-    assert(pd_call_destination(branch) == target, "should be");
+    assert(target_addr_for_insn(branch) == target, "should be");
     instructions = 3;
   } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
              Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
@@ -356,6 +356,42 @@
   }
 }
 
+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(entry.target()) != NULL,
+         "destination of far call not found in code cache");
+  if (far_branches()) {
+    unsigned long offset;
+    // We can use ADRP here because we know that the total size of
+    // the code cache cannot exceed 2Gb.
+    adrp(tmp, entry, offset);
+    add(tmp, tmp, offset);
+    if (cbuf) cbuf->set_insts_mark();
+    blr(tmp);
+  } else {
+    if (cbuf) cbuf->set_insts_mark();
+    bl(entry);
+  }
+}
+
+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(entry.target()) != NULL,
+         "destination of far call not found in code cache");
+  if (far_branches()) {
+    unsigned long offset;
+    // We can use ADRP here because we know that the total size of
+    // the code cache cannot exceed 2Gb.
+    adrp(tmp, entry, offset);
+    add(tmp, tmp, offset);
+    if (cbuf) cbuf->set_insts_mark();
+    br(tmp);
+  } else {
+    if (cbuf) cbuf->set_insts_mark();
+    b(entry);
+  }
+}
+
 int MacroAssembler::biased_locking_enter(Register lock_reg,
                                          Register obj_reg,
                                          Register swap_reg,
@@ -629,14 +665,74 @@
   call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
 }
 
-void MacroAssembler::call(Address entry) {
-  if (true // reachable(entry)
-      ) {
-    bl(entry);
+// Maybe emit a call via a trampoline.  If the code cache is small
+// trampolines won't be emitted.
+
+void MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
+  assert(entry.rspec().type() == relocInfo::runtime_call_type
+         || entry.rspec().type() == relocInfo::opt_virtual_call_type
+         || entry.rspec().type() == relocInfo::static_call_type
+         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
+
+  unsigned int start_offset = offset();
+  if (far_branches() && !Compile::current()->in_scratch_emit_size()) {
+    emit_trampoline_stub(offset(), entry.target());
+  }
+
+  if (cbuf) cbuf->set_insts_mark();
+  relocate(entry.rspec());
+  if (Assembler::reachable_from_branch_at(pc(), entry.target())) {
+    bl(entry.target());
   } else {
-    lea(rscratch1, entry);
-    blr(rscratch1);
+    bl(pc());
+  }
+}
+
+
+// Emit a trampoline stub for a call to a target which is too far away.
+//
+// code sequences:
+//
+// call-site:
+//   branch-and-link to <destination> or <trampoline stub>
+//
+// Related trampoline stub for this call site in the stub section:
+//   load the call target from the constant pool
+//   branch (LR still points to the call site above)
+
+void MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
+                                             address dest) {
+  address stub = start_a_stub(Compile::MAX_stubs_size/2);
+  if (stub == NULL) {
+    start_a_stub(Compile::MAX_stubs_size/2);
+    Compile::current()->env()->record_out_of_memory_failure();
+    return;
   }
+
+  // Create a trampoline stub relocation which relates this trampoline stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  align(wordSize);
+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
+                                            + insts_call_instruction_offset));
+  const int stub_start_offset = offset();
+
+  // Now, create the trampoline stub's code:
+  // - load the call
+  // - call
+  Label target;
+  ldr(rscratch1, target);
+  br(rscratch1);
+  bind(target);
+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
+         "should be");
+  emit_int64((int64_t)dest);
+
+  const address stub_start_addr = addr_at(stub_start_offset);
+
+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
+
+  end_a_stub();
 }
 
 void MacroAssembler::ic_call(address entry) {
@@ -645,7 +741,7 @@
   // unsigned long offset;
   // ldr_constant(rscratch2, const_ptr);
   movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
-  call(Address(entry, rh));
+  trampoline_call(Address(entry, rh));
 }
 
 // Implementation of call_VM versions
@@ -1293,8 +1389,7 @@
 // public methods
 
 void MacroAssembler::mov(Register r, Address dest) {
-  InstructionMark im(this);
-  code_section()->relocate(inst_mark(), dest.rspec());
+  code_section()->relocate(pc(), dest.rspec());
   u_int64_t imm64 = (u_int64_t)dest.target();
   movptr(r, imm64);
 }
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Dec 09 13:08:38 2015 +0000
@@ -509,7 +509,11 @@
   static bool needs_explicit_null_check(intptr_t offset);
 
   static address target_addr_for_insn(address insn_addr, unsigned insn);
-
+  static address target_addr_for_insn(address insn_addr) {
+    unsigned insn = *(unsigned*)insn_addr;
+    return target_addr_for_insn(insn_addr, insn);
+  }
+  
   // Required platform-specific helpers for Label::patch_instructions.
   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
   static int pd_patch_instruction_size(address branch, address target);
@@ -517,8 +521,7 @@
     pd_patch_instruction_size(branch, target);
   }
   static address pd_call_destination(address branch) {
-    unsigned insn = *(unsigned*)branch;
-    return target_addr_for_insn(branch, insn);
+    return target_addr_for_insn(branch);
   }
 #ifndef PRODUCT
   static void pd_print_patched_instruction(address branch);
@@ -526,6 +529,8 @@
 
   static int patch_oop(address insn_addr, address o);
 
+  void emit_trampoline_stub(int insts_call_instruction_offset, address target);
+
   // The following 4 methods return the offset of the appropriate move instruction
 
   // Support for fast byte/short loading with zero extension (depending on particular CPU)
@@ -942,12 +947,24 @@
 
   // Calls
 
-  // void call(Label& L, relocInfo::relocType rtype);
+  void trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
+
+  static bool far_branches() {
+    return ReservedCodeCacheSize > branch_range;
+  }
 
-  // NOTE: this call tranfers to the effective address of entry NOT
-  // the address contained by entry. This is because this is more natural
-  // for jumps/calls.
-  void call(Address entry);
+  // Jumps that can reach anywhere in the code cache.
+  // Trashes tmp.
+  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
+  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
+
+  static int far_branch_size() {
+    if (far_branches()) {
+      return 3 * 4;  // adrp, add, br
+    } else {
+      return 4;
+    }
+  }
 
   // Emit the CompiledIC call idiom
   void ic_call(address entry);
--- a/src/cpu/aarch64/vm/methodHandles_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/methodHandles_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -115,7 +115,7 @@
   __ ldr(rscratch1,Address(method, entry_offset));
   __ br(rscratch1);
   __ bind(L_no_such_method);
-  __ b(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
+  __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
 }
 
 void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
@@ -418,7 +418,7 @@
     jump_from_method_handle(_masm, rmethod, temp1, for_compiler_entry);
     if (iid == vmIntrinsics::_linkToInterface) {
       __ bind(L_incompatible_class_change_error);
-      __ b(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
     }
   }
 }
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -40,7 +40,73 @@
 void NativeCall::verify() { ; }
 
 address NativeCall::destination() const {
-  return instruction_address() + displacement();
+  address addr = (address)this;
+  address destination = instruction_address() + displacement();
+
+  // Do we use a trampoline stub for this call?
+  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
+  assert(cb && cb->is_nmethod(), "sanity");
+  nmethod *nm = (nmethod *)cb;
+  if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
+    // Yes we do, so get the destination from the trampoline stub.
+    const address trampoline_stub_addr = destination;
+    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
+  }
+
+  return destination;
+}
+
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+//
+// Add parameter assert_lock to switch off assertion
+// during code generation, where no patching lock is needed.
+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
+  assert(!assert_lock ||
+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
+         "concurrent code patching");
+
+  ResourceMark rm;
+  int code_size = NativeInstruction::instruction_size;
+  address addr_call = addr_at(0);
+  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
+
+  // Patch the constant in the call's trampoline stub.
+  address trampoline_stub_addr = get_trampoline();
+  if (trampoline_stub_addr != NULL) {
+    assert (! is_NativeCallTrampolineStub_at(dest), "chained trampolines");
+    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
+  }
+
+  // Patch the call.
+  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
+    set_destination(dest);
+  } else {
+    assert (trampoline_stub_addr != NULL, "we need a trampoline");
+    set_destination(trampoline_stub_addr);
+  }
+
+  ICache::invalidate_range(addr_call, instruction_size);
+}
+
+address NativeCall::get_trampoline() {
+  address call_addr = addr_at(0);
+
+  CodeBlob *code = CodeCache::find_blob(call_addr);
+  assert(code != NULL, "Could not find the containing code blob");
+
+  address bl_destination
+    = MacroAssembler::pd_call_destination(call_addr);
+  if (code->content_contains(bl_destination) &&
+      is_NativeCallTrampolineStub_at(bl_destination))
+    return bl_destination;
+
+  // If the codeBlob is not a nmethod, this is because we get here from the
+  // CodeBlob constructor, which is called within the nmethod constructor.
+  return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
 }
 
 // Inserts a native call instruction at a given pc
@@ -55,7 +121,7 @@
 
 intptr_t NativeMovConstReg::data() const {
   // das(uint64_t(instruction_address()),2);
-  address addr = MacroAssembler::pd_call_destination(instruction_address());
+  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
   if (maybe_cpool_ref(instruction_address())) {
     return *(intptr_t*)addr;
   } else {
@@ -65,7 +131,7 @@
 
 void NativeMovConstReg::set_data(intptr_t x) {
   if (maybe_cpool_ref(instruction_address())) {
-    address addr = MacroAssembler::pd_call_destination(instruction_address());
+    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
     *(intptr_t*)addr = x;
   } else {
     MacroAssembler::pd_patch_instruction(instruction_address(), (address)x);
@@ -86,10 +152,10 @@
   address pc = instruction_address();
   unsigned insn = *(unsigned*)pc;
   if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
-    address addr = MacroAssembler::pd_call_destination(pc);
+    address addr = MacroAssembler::target_addr_for_insn(pc);
     return *addr;
   } else {
-    return (int)(intptr_t)MacroAssembler::pd_call_destination(instruction_address());
+    return (int)(intptr_t)MacroAssembler::target_addr_for_insn(instruction_address());
   }
 }
 
@@ -97,7 +163,7 @@
   address pc = instruction_address();
   unsigned insn = *(unsigned*)pc;
   if (maybe_cpool_ref(pc)) {
-    address addr = MacroAssembler::pd_call_destination(pc);
+    address addr = MacroAssembler::target_addr_for_insn(pc);
     *(long*)addr = x;
   } else {
     MacroAssembler::pd_patch_instruction(pc, (address)intptr_t(x));
@@ -107,7 +173,7 @@
 
 void NativeMovRegMem::verify() {
 #ifdef ASSERT
-  address dest = MacroAssembler::pd_call_destination(instruction_address());
+  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
 #endif
 }
 
@@ -121,7 +187,7 @@
 
 
 address NativeJump::jump_destination() const          {
-  address dest = MacroAssembler::pd_call_destination(instruction_address());
+  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
 
   // We use jump to self as the unresolved address which the inline
   // cache code (and relocs) know about
@@ -192,19 +258,39 @@
   return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b11100101;
 }
 
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+  return uint_at(0) == 0xd4bbd5a1; // dcps1 #0xdead
+}
+
+void NativeIllegalInstruction::insert(address code_pos) {
+  *(juint*)code_pos = 0xd4bbd5a1; // dcps1 #0xdead
+}
+
 //-------------------------------------------------------------------
 
-// MT safe inserting of a jump over a jump or a nop (used by nmethod::makeZombie)
+// MT safe inserting of a jump over a jump or a nop (used by
+// nmethod::makeZombie)
 
 void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
-  ptrdiff_t disp = dest - verified_entry;
-  guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow");
+
+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop()
+         || nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
+         "Aarch64 cannot replace non-jump with jump");
 
-  unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff);
+  // Patch this nmethod atomically.
+  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
+    ptrdiff_t disp = dest - verified_entry;
+    guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow");
 
-  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop(),
-	 "Aarch64 cannot replace non-jump with jump");
-  *(unsigned int*)verified_entry = insn;
+    unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff);
+    *(unsigned int*)verified_entry = insn;
+  } else {
+    // We use an illegal instruction for marking a method as
+    // not_entrant or zombie.
+    NativeIllegalInstruction::insert(verified_entry);
+  }
+
   ICache::invalidate_range(verified_entry, instruction_size);
 }
 
@@ -212,23 +298,28 @@
 
 void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
   NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos;
-  ptrdiff_t disp = entry - code_pos;
-  guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow");
+
+  CodeBuffer cb(code_pos, instruction_size);
+  MacroAssembler a(&cb);
 
-  unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff);
-  *(unsigned int*)code_pos = insn;
+  a.mov(rscratch1, entry);
+  a.br(rscratch1);
+
   ICache::invalidate_range(code_pos, instruction_size);
 }
 
 // MT-safe patching of a long jump instruction.
 void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
-  NativeGeneralJump* n_jump = (NativeGeneralJump*)instr_addr;
-  assert(n_jump->is_jump_or_nop(),
-	 "Aarch64 cannot replace non-jump with jump");
-  uint32_t instr = *(uint32_t*)code_buffer;
-  *(uint32_t*)instr_addr = instr;
-  ICache::invalidate_range(instr_addr, instruction_size);
+  ShouldNotCallThis();
 }
 
 bool NativeInstruction::is_dtrace_trap() { return false; }
 
+address NativeCallTrampolineStub::destination(nmethod *nm) const {
+  return ptr_at(data_offset);
+}
+
+void NativeCallTrampolineStub::set_destination(address new_destination) {
+  set_ptr_at(data_offset, new_destination);
+  OrderAccess::release();
+}
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Wed Dec 09 13:08:38 2015 +0000
@@ -53,6 +53,7 @@
 
 class NativeInstruction VALUE_OBJ_CLASS_SPEC {
   friend class Relocation;
+  friend bool is_NativeCallTrampolineStub_at(address);
  public:
   enum { instruction_size = 4 };
   inline bool is_nop();
@@ -66,6 +67,7 @@
   inline bool is_mov_literal64();
   bool is_movz();
   bool is_movk();
+  bool is_sigill_zombie_not_entrant();
 
  protected:
   address addr_at(int offset) const    { return address(this) + offset; }
@@ -73,16 +75,18 @@
   s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
   u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
 
-  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
+  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
+  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
 
-  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
+  address ptr_at(int offset) const    { return *(address*) addr_at(offset); }
 
   oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
 
 
   void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; }
   void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
-  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr; }
+  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
+  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
   void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
 
  public:
@@ -138,22 +142,8 @@
     offset &= (1 << 26) - 1; // mask off insn part
     insn |= offset;
     set_int_at(displacement_offset, insn);
-    ICache::invalidate_range(instruction_address(), instruction_size);
   }
 
-  // Similar to replace_mt_safe, but just changes the destination.  The
-  // important thing is that free-running threads are able to execute
-  // this call instruction at all times.  If the call is an immediate BL
-  // instruction we can simply rely on atomicity of 32-bit writes to
-  // make sure other threads will see no intermediate states.
-
-  // We cannot rely on locks here, since the free-running threads must run at
-  // full speed.
-  //
-  // Used in the runtime linkage of calls; see class CompiledIC.
-  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
-  void  set_destination_mt_safe(address dest) { set_destination(dest); }
-
   void  verify_alignment()                       { ; }
   void  verify();
   void  print();
@@ -175,6 +165,23 @@
   static void insert(address code_pos, address entry);
 
   static void replace_mt_safe(address instr_addr, address code_buffer);
+  
+  // Similar to replace_mt_safe, but just changes the destination.  The
+  // important thing is that free-running threads are able to execute
+  // this call instruction at all times.  If the call is an immediate BL
+  // instruction we can simply rely on atomicity of 32-bit writes to
+  // make sure other threads will see no intermediate states.
+
+  // We cannot rely on locks here, since the free-running threads must run at
+  // full speed.
+  //
+  // Used in the runtime linkage of calls; see class CompiledIC.
+  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
+
+  // The parameter assert_lock disables the assertion during code generation.
+  void set_destination_mt_safe(address dest, bool assert_lock = true);
+
+  address get_trampoline();
 };
 
 inline NativeCall* nativeCall_at(address address) {
@@ -378,10 +385,10 @@
 class NativeGeneralJump: public NativeJump {
 public:
   enum AArch64_specific_constants {
-    instruction_size            =    4,
+    instruction_size            =    4 * 4,
     instruction_offset          =    0,
     data_offset                 =    0,
-    next_instruction_offset     =    4
+    next_instruction_offset     =    4 * 4
   };
   static void insert_unconditional(address code_pos, address entry);
   static void replace_mt_safe(address instr_addr, address code_buffer);
@@ -450,4 +457,34 @@
   return is_nop() || is_jump();
 }
 
+// Call trampoline stubs.
+class NativeCallTrampolineStub : public NativeInstruction {
+ public:
+
+  enum AArch64_specific_constants {
+    instruction_size            =    4 * 4,
+    instruction_offset          =    0,
+    data_offset                 =    2 * 4,
+    next_instruction_offset     =    4 * 4
+  };
+
+  address destination(nmethod *nm = NULL) const;
+  void set_destination(address new_destination);
+  ptrdiff_t destination_offset() const;
+};
+
+inline bool is_NativeCallTrampolineStub_at(address addr) {
+  // Ensure that the stub is exactly
+  //      ldr   xscratch1, L
+  //      br    xscratch1
+  // L:
+  uint32_t *i = (uint32_t *)addr;
+  return i[0] == 0x58000048 && i[1] == 0xd61f0100;
+}
+
+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
+  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
+  return (NativeCallTrampolineStub*)addr;
+}
+
 #endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
--- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -59,6 +59,13 @@
 }
 
 address Relocation::pd_call_destination(address orig_addr) {
+  assert(is_call(), "should be a call here");
+  if (NativeCall::is_call_at(addr())) {
+    address trampoline = nativeCall_at(addr())->get_trampoline();
+    if (trampoline) {
+      return nativeCallTrampolineStub_at(trampoline)->destination();
+    }
+  }
   if (orig_addr != NULL) {
     return MacroAssembler::pd_call_destination(orig_addr);
   }
@@ -67,7 +74,17 @@
 
 
 void Relocation::pd_set_call_destination(address x) {
+  assert(is_call(), "should be a call here");
+  if (NativeCall::is_call_at(addr())) {
+    address trampoline = nativeCall_at(addr())->get_trampoline();
+    if (trampoline) {
+      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
+      return;
+    }
+  }
+  assert(addr() != x, "call instruction in an infinite loop");
   MacroAssembler::pd_patch_instruction(addr(), x);
+  assert(pd_call_destination(addr()) == x, "fail in reloc");
 }
 
 address* Relocation::pd_address_in_code() {
@@ -80,17 +97,16 @@
 }
 
 void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-  // fprintf(stderr, "Try to fix poll reloc at %p to %p\n", addr(), dest);
   if (NativeInstruction::maybe_cpool_ref(addr())) {
     address old_addr = old_addr_for(addr(), src, dest);
-    MacroAssembler::pd_patch_instruction(addr(), pd_call_destination(old_addr));
+    MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr));
   }
 }
 
 void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest)  {
   if (NativeInstruction::maybe_cpool_ref(addr())) {
     address old_addr = old_addr_for(addr(), src, dest);
-    MacroAssembler::pd_patch_instruction(addr(), pd_call_destination(old_addr));
+    MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr));
   }
 }
 
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -752,7 +752,7 @@
     __ cmp(rscratch1, tmp);
     __ ldr(rmethod, Address(holder, CompiledICHolder::holder_method_offset()));
     __ br(Assembler::EQ, ok);
-    __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
     __ bind(ok);
     // Method might have been compiled since the call site was patched to
@@ -760,7 +760,7 @@
     // the call site corrected.
     __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
     __ cbz(rscratch1, skip_fixup);
-    __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
     __ block_comment("} c2i_unverified_entry");
   }
 
@@ -1178,7 +1178,7 @@
 static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, int type) {
   CodeBlob *cb = CodeCache::find_blob(dest);
   if (cb) {
-    __ bl(RuntimeAddress(dest));
+    __ far_call(RuntimeAddress(dest));
   } else {
     assert((unsigned)gpargs < 256, "eek!");
     assert((unsigned)fpargs < 32, "eek!");
@@ -1549,7 +1549,7 @@
   __ cmp_klass(receiver, ic_reg, rscratch1);
   __ br(Assembler::EQ, hit);
 
-  __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
   // Verified entry point must be aligned
   __ align(8);
@@ -2072,7 +2072,7 @@
     __ bind(exception_pending);
 
     // and forward the exception
-    __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
   }
 
   // Slow path locking & unlocking
@@ -2865,7 +2865,7 @@
 
   RegisterSaver::restore_live_registers(masm);
 
-  __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 
   // No exception case
   __ bind(noException);
@@ -2961,7 +2961,7 @@
   __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
 
   __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
-  __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 
   // -------------
   // make sure all code is generated
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -2495,7 +2495,7 @@
     __ should_not_reach_here();
     __ bind(L);
 #endif // ASSERT
-    __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 
 
     // codeBlob framesize is in words (not VMRegImpl::slot_size)
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -524,7 +524,7 @@
   // Note: the restored frame is not necessarily interpreted.
   // Use the shared runtime version of the StackOverflowError.
   assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
-  __ b(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
+  __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
 
   // all done with frame size check
   __ bind(after_frame_check);
--- a/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -180,7 +180,7 @@
   __ br(rscratch1);
 
   __ bind(throw_icce);
-  __ b(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+  __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
 
   __ flush();
 
--- a/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -375,7 +375,14 @@
       // Java thread running in Java code => find exception handler if any
       // a fault inside compiled code, the interpreter, or a stub
 
-      if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+      // Handle signal from NativeJump::patch_verified_entry().
+      if ((sig == SIGILL || sig == SIGTRAP)
+          && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+        if (TraceTraps) {
+          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
+        }
+        stub = SharedRuntime::get_handle_wrong_method_stub();
+      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
         stub = SharedRuntime::get_poll_stub(pc);
       } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
         // BugId 4454115: A read from a MappedByteBuffer can fault
--- a/src/share/vm/runtime/arguments.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/share/vm/runtime/arguments.cpp	Wed Dec 09 13:08:38 2015 +0000
@@ -1137,9 +1137,8 @@
   }
   // Increase the code cache size - tiered compiles a lot more.
   if (FLAG_IS_DEFAULT(ReservedCodeCacheSize)) {
-    FLAG_SET_DEFAULT(ReservedCodeCacheSize, ReservedCodeCacheSize * 5);
-    // The maximum B/BL offset range on AArch64 is 128MB
-    AARCH64_ONLY(FLAG_SET_DEFAULT(ReservedCodeCacheSize, MIN2(ReservedCodeCacheSize, 128*M)));
+    FLAG_SET_DEFAULT(ReservedCodeCacheSize,
+                     MIN2(CODE_CACHE_DEFAULT_LIMIT, ReservedCodeCacheSize * 5));
   }
   if (!UseInterpreter) { // -Xcomp
     Tier3InvokeNotifyFreqLog = 0;
@@ -2476,11 +2475,11 @@
                 "Invalid ReservedCodeCacheSize=%dK. Must be at least %uK.\n", ReservedCodeCacheSize/K,
                 min_code_cache_size/K);
     status = false;
-  } else if (ReservedCodeCacheSize > 2*G) {
-    // Code cache size larger than MAXINT is not supported.
+  } else if (ReservedCodeCacheSize > CODE_CACHE_SIZE_LIMIT) {
+    // Code cache size larger than CODE_CACHE_SIZE_LIMIT is not supported.
     jio_fprintf(defaultStream::error_stream(),
                 "Invalid ReservedCodeCacheSize=%dM. Must be at most %uM.\n", ReservedCodeCacheSize/M,
-                (2*G)/M);
+                CODE_CACHE_SIZE_LIMIT/M);
     status = false;
   }
 
--- a/src/share/vm/utilities/globalDefinitions.hpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Wed Dec 09 13:08:38 2015 +0000
@@ -414,6 +414,11 @@
   ProfileRTM = 0x0  // Use RTM with abort ratio calculation
 };
 
+// The maximum size of the code cache.  Can be overridden by targets.
+#define CODE_CACHE_SIZE_LIMIT (2*G)
+// Allow targets to reduce the default size of the code cache.
+#define CODE_CACHE_DEFAULT_LIMIT CODE_CACHE_SIZE_LIMIT
+
 #ifdef TARGET_ARCH_x86
 # include "globalDefinitions_x86.hpp"
 #endif