changeset 8540:8fd636dd1c91

Merge
author asaha
date Wed, 10 Jun 2015 23:13:44 -0700
parents 0e5f64fa55c9 (current diff) 8b16790cd73a (diff)
children 06114526675f
files .hgtags make/hotspot_version src/share/vm/c1/c1_LIRGenerator.cpp
diffstat 54 files changed, 1190 insertions(+), 561 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Jun 08 12:06:37 2015 -0700
+++ b/.hgtags	Wed Jun 10 23:13:44 2015 -0700
@@ -670,3 +670,5 @@
 c26d09f1065cd26bd8b926efc5d3938b71e09eb5 jdk8u60-b17
 624f4cc05e7e95dd2103f343c54d7bdea6a81919 hs25.60-b18
 3fa5c654c143fe309e5ddda92adc5fb132365bcf jdk8u60-b18
+b852350a2bc6d5f43006e2be53fb74d148290708 hs25.60-b19
+bd9221771f6e34e63b3b340ffcf9906ccf882dae jdk8u60-b19
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Mon Jun 08 12:06:37 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Wed Jun 10 23:13:44 2015 -0700
@@ -317,26 +317,17 @@
   //------------------------------------------------------------------------------
   // frame::adjust_unextended_sp
   private void adjustUnextendedSP() {
-    // If we are returning to a compiled MethodHandle call site, the
-    // saved_fp will in fact be a saved value of the unextended SP.  The
-    // simplest way to tell whether we are returning to such a call site
-    // is as follows:
+    // On x86, sites calling method handle intrinsics and lambda forms are treated
+    // as any other call site. Therefore, no special action is needed when we are
+    // returning to any of these call sites.
 
     CodeBlob cb = cb();
     NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
     if (senderNm != null) {
-      // If the sender PC is a deoptimization point, get the original
-      // PC.  For MethodHandle call site the unextended_sp is stored in
-      // saved_fp.
-      if (senderNm.isDeoptMhEntry(getPC())) {
-        // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
-        raw_unextendedSP = getFP();
-      }
-      else if (senderNm.isDeoptEntry(getPC())) {
-        // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
-      }
-      else if (senderNm.isMethodHandleReturn(getPC())) {
-        raw_unextendedSP = getFP();
+      // If the sender PC is a deoptimization point, get the original PC.
+      if (senderNm.isDeoptEntry(getPC()) ||
+          senderNm.isDeoptMhEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
       }
     }
   }
--- a/make/hotspot_version	Mon Jun 08 12:06:37 2015 -0700
+++ b/make/hotspot_version	Wed Jun 10 23:13:44 2015 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=25
 HS_MINOR_VER=60
-HS_BUILD_NUMBER=18
+HS_BUILD_NUMBER=19
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/linux/makefiles/dtrace.make	Mon Jun 08 12:06:37 2015 -0700
+++ b/make/linux/makefiles/dtrace.make	Wed Jun 10 23:13:44 2015 -0700
@@ -31,8 +31,8 @@
 REASON = "This JDK does not support SDT probes"
 else
 
-# We need a recent GCC for the default
-ifeq "$(shell expr \( $(CC_VER_MAJOR) \>= 4 \) \& \( $(CC_VER_MINOR) \>= 4 \) )" "0"
+# We need a recent GCC for the default (4.4 or later)
+ifeq "$(shell expr \( \( $(CC_VER_MAJOR) = 4 \) \& \( $(CC_VER_MINOR) \>= 4 \) \) \| \( $(CC_VER_MAJOR) \>= 5 \) )" "0"
 REASON = "gcc version is too old"
 else
 
--- a/src/cpu/ppc/vm/globals_ppc.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -55,6 +55,8 @@
 
 define_pd_global(bool, UseMembar,             false);
 
+define_pd_global(bool, PreserveFramePointer,  false);
+
 // GC Ergo Flags
 define_pd_global(uintx, CMSYoungGenPerWorker, 16*M);  // Default max size of CMS young gen, per GC worker thread.
 
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -74,6 +74,8 @@
 
 define_pd_global(bool, UseMembar,            false);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 // GC Ergo Flags
 define_pd_global(uintx, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
 
--- a/src/cpu/x86/vm/assembler_x86.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -141,8 +141,10 @@
 
 #endif // _LP64
 
-// JSR 292 fixed register usages:
-REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
+// JSR 292
+// On x86, the SP does not have to be saved when invoking method handle intrinsics
+// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
+REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
 
 // Address is an abstraction used to represent a memory location
 // using any of the amd64 addressing modes with one object.
--- a/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -343,14 +343,13 @@
   return FrameMap::rsp_opr;
 }
 
-
 // JSR 292
+// On x86, there is no need to save the SP, because neither
+// method handle intrinsics, nor compiled lambda forms modify it.
 LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
-  assert(rbp == rbp_mh_SP_save, "must be same register");
-  return rbp_opr;
+  return LIR_OprFact::illegalOpr;
 }
 
-
 bool FrameMap::validate_frame() {
   return true;
 }
--- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -359,6 +359,9 @@
   generate_stack_overflow_check(bang_size_in_bytes);
 
   push(rbp);
+  if (PreserveFramePointer) {
+    mov(rbp, rsp);
+  }
 #ifdef TIERED
   // c2 leaves fpu stack dirty. Clean it on entry
   if (UseSSE < 2 ) {
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -754,14 +754,9 @@
     // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
     // since we do a leave anyway.
 
-    // Pop the return address since we are possibly changing SP (restoring from BP).
+    // Pop the return address.
     __ leave();
     __ pop(rcx);
-
-    // Restore SP from BP if the exception PC is a method handle call site.
-    NOT_LP64(__ get_thread(thread);)
-    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-    __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
     __ jmp(rcx);  // jump to exception handler
     break;
   default:  ShouldNotReachHere();
@@ -832,11 +827,6 @@
   // the pop is also necessary to simulate the effect of a ret(0)
   __ pop(exception_pc);
 
-  // Restore SP from BP if the exception PC is a method handle call site.
-  NOT_LP64(__ get_thread(thread);)
-  __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // continue at exception handler (return address removed)
   // note: do *not* remove arguments when unwinding the
   //       activation since the caller assumes having
--- a/src/cpu/x86/vm/frame_x86.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -216,7 +216,8 @@
     if (sender_blob->is_nmethod()) {
         nmethod* nm = sender_blob->as_nmethod_or_null();
         if (nm != NULL) {
-            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
+                nm->method()->is_method_handle_intrinsic()) {
                 return false;
             }
         }
@@ -383,10 +384,9 @@
 // frame::verify_deopt_original_pc
 //
 // Verifies the calculated original PC of a deoptimization PC for the
-// given unextended SP.  The unextended SP might also be the saved SP
-// for MethodHandle call sites.
+// given unextended SP.
 #ifdef ASSERT
-void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) {
   frame fr;
 
   // This is ugly but it's better than to change {get,set}_original_pc
@@ -396,33 +396,23 @@
 
   address original_pc = nm->get_original_pc(&fr);
   assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
-  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
 }
 #endif
 
 //------------------------------------------------------------------------------
 // frame::adjust_unextended_sp
 void frame::adjust_unextended_sp() {
-  // If we are returning to a compiled MethodHandle call site, the
-  // saved_fp will in fact be a saved value of the unextended SP.  The
-  // simplest way to tell whether we are returning to such a call site
-  // is as follows:
+  // On x86, sites calling method handle intrinsics and lambda forms are treated
+  // as any other call site. Therefore, no special action is needed when we are
+  // returning to any of these call sites.
 
   nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
   if (sender_nm != NULL) {
-    // If the sender PC is a deoptimization point, get the original
-    // PC.  For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    if (sender_nm->is_deopt_mh_entry(_pc)) {
-      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
-      _unextended_sp = _fp;
-    }
-    else if (sender_nm->is_deopt_entry(_pc)) {
+    // If the sender PC is a deoptimization point, get the original PC.
+    if (sender_nm->is_deopt_entry(_pc) ||
+        sender_nm->is_deopt_mh_entry(_pc)) {
       DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
     }
-    else if (sender_nm->is_method_handle_return(_pc)) {
-      _unextended_sp = _fp;
-    }
   }
 }
 
--- a/src/cpu/x86/vm/frame_x86.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -76,11 +76,11 @@
 //    [locals and parameters   ]
 //                               <- sender sp
 
-// [1] When the c++ interpreter calls a new method it returns to the frame
+// [1] When the C++ interpreter calls a new method it returns to the frame
 //     manager which allocates a new frame on the stack. In that case there
 //     is no real callee of this newly allocated frame. The frame manager is
-//     aware of the  additional frame(s) and will pop them as nested calls
-//     complete. Howevers tTo make it look good in the debugger the frame
+//     aware of the additional frame(s) and will pop them as nested calls
+//     complete. However, to make it look good in the debugger the frame
 //     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
 //     with a fake interpreter_state* parameter to make it easy to debug
 //     nested calls.
@@ -88,7 +88,7 @@
 // Note that contrary to the layout for the assembly interpreter the
 // expression stack allocated for the C++ interpreter is full sized.
 // However this is not as bad as it seems as the interpreter frame_manager
-// will truncate the unused space on succesive method calls.
+// will truncate the unused space on successive method calls.
 //
 // ------------------------------ C++ interpreter ----------------------------------------
 
@@ -172,10 +172,7 @@
 
 #ifdef ASSERT
   // Used in frame::sender_for_{interpreter,compiled}_frame
-  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
-  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
-    verify_deopt_original_pc(nm, unextended_sp, true);
-  }
+  static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp);
 #endif
 
  public:
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -93,7 +93,7 @@
   // find_blob call. This is also why we can have no asserts on the validity
   // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
   // -> pd_last_frame should use a specialized version of pd_last_frame which could
-  // call a specilaized frame constructor instead of this one.
+  // call a specialized frame constructor instead of this one.
   // Then we could use the assert below. However this assert is of somewhat dubious
   // value.
   // assert(_pc != NULL, "no pc?");
--- a/src/cpu/x86/vm/globals_x86.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -82,6 +82,8 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
                                                                             \
   develop(bool, IEEEPrecision, true,                                        \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -6122,6 +6122,10 @@
     // We always push rbp, so that on return to interpreter rbp, will be
     // restored correctly and we can correct the stack.
     push(rbp);
+    // Save caller's stack pointer into RBP if the frame pointer is preserved.
+    if (PreserveFramePointer) {
+      mov(rbp, rsp);
+    }
     // Remove word for ebp
     framesize -= wordSize;
 
@@ -6136,6 +6140,11 @@
     // Save RBP register now.
     framesize -= wordSize;
     movptr(Address(rsp, framesize), rbp);
+    // Save caller's stack pointer into RBP if the frame pointer is preserved.
+    if (PreserveFramePointer) {
+      movptr(rbp, rsp);
+      addptr(rbp, framesize + wordSize);
+    }
   }
 
   if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -373,7 +373,7 @@
     //  member_reg - MemberName that was the trailing argument
     //  temp1_recv_klass - klass of stacked receiver, if needed
     //  rsi/r13 - interpreter linkage (if interpreted)
-    //  rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
+    //  rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled)
 
     Label L_incompatible_class_change_error;
     switch (iid) {
--- a/src/cpu/x86/vm/runtime_x86_32.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -126,10 +126,6 @@
 
   // rax: exception handler for given <exception oop/exception pc>
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // We have a handler in rax, (could be deopt blob)
   // rdx - throwing pc, deopt blob will need it.
 
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -4017,8 +4017,8 @@
 
   // Save callee-saved registers.  See x86_64.ad.
 
-  // rbp is an implicitly saved callee saved register (i.e. the calling
-  // convention will save restore it in prolog/epilog) Other than that
+  // rbp is an implicitly saved callee saved register (i.e., the calling
+  // convention will save/restore it in the prolog/epilog). Other than that
   // there are no callee save registers now that adapter frames are gone.
 
   __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
@@ -4060,9 +4060,9 @@
 
   // Restore callee-saved registers
 
-  // rbp is an implicitly saved callee saved register (i.e. the calling
+  // rbp is an implicitly saved callee-saved register (i.e., the calling
   // convention will save restore it in prolog/epilog) Other than that
-  // there are no callee save registers no that adapter frames are gone.
+  // there are no callee save registers now that adapter frames are gone.
 
   __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
 
@@ -4071,10 +4071,6 @@
 
   // rax: exception handler
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // We have a handler in rax (could be deopt blob).
   __ mov(r8, rax);
 
--- a/src/cpu/x86/vm/x86.ad	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/x86.ad	Wed Jun 10 23:13:44 2015 -0700
@@ -912,21 +912,6 @@
 
 encode %{
 
-  enc_class preserve_SP %{
-    debug_only(int off0 = cbuf.insts_size());
-    MacroAssembler _masm(&cbuf);
-    // RBP is preserved across all calls, even compiled calls.
-    // Use it to preserve RSP in places where the callee might change the SP.
-    __ movptr(rbp_mh_SP_save, rsp);
-    debug_only(int off1 = cbuf.insts_size());
-    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
-  %}
-
-  enc_class restore_SP %{
-    MacroAssembler _masm(&cbuf);
-    __ movptr(rsp, rbp_mh_SP_save);
-  %}
-
   enc_class call_epilog %{
     if (VerifyStackAtCalls) {
       // Check that stack depth is unchanged: find majik cookie on stack
--- a/src/cpu/x86/vm/x86_32.ad	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Wed Jun 10 23:13:44 2015 -0700
@@ -123,50 +123,94 @@
 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 //
+// Class for no registers (empty set).
+reg_class no_reg();
+
 // Class for all registers
-reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
+reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
+// Class for all registers (excluding EBP)
+reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
+// Dynamic register class that selects at runtime between register classes
+// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 
+// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
+reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class for general registers
-reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
-// Class for general registers which may be used for implicit null checks on win95
-// Also safe for use by tailjump. We don't want to allocate in rbp,
-reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
+reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
+// Class for general registers (excluding EBP).
+// This register class can be used for implicit null checks on win95.
+// It is also safe for use by tailjumps (we don't want to allocate in ebp).
+// Used also if the PreserveFramePointer flag is true.
+reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
+// Dynamic register class that selects between int_reg and int_reg_no_ebp.
+reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of "X" registers
 reg_class int_x_reg(EBX, ECX, EDX, EAX);
+
 // Class of registers that can appear in an address with no offset.
 // EBP and ESP require an extra instruction byte for zero offset.
 // Used in fast-unlock
 reg_class p_reg(EDX, EDI, ESI, EBX);
-// Class for general registers not including ECX
-reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
-// Class for general registers not including EAX
+
+// Class for general registers excluding ECX
+reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
+// Class for general registers excluding ECX (and EBP)
+reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
+// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
+reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
+
+// Class for general registers excluding EAX
 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
-// Class for general registers not including EAX or EBX.
-reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
+
+// Class for general registers excluding EAX and EBX.
+reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
+// Class for general registers excluding EAX and EBX (and EBP)
+reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
+// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
+reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of EAX (for multiply and divide operations)
 reg_class eax_reg(EAX);
+
 // Class of EBX (for atomic add)
 reg_class ebx_reg(EBX);
+
 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 reg_class ecx_reg(ECX);
+
 // Class of EDX (for multiply and divide operations)
 reg_class edx_reg(EDX);
+
 // Class of EDI (for synchronization)
 reg_class edi_reg(EDI);
+
 // Class of ESI (for synchronization)
 reg_class esi_reg(ESI);
-// Singleton class for interpreter's stack pointer
-reg_class ebp_reg(EBP);
+
 // Singleton class for stack pointer
 reg_class sp_reg(ESP);
+
 // Singleton class for instruction pointer
 // reg_class ip_reg(EIP);
+
 // Class of integer register pairs
-reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
+reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
+// Class of integer register pairs (excluding EBP and EDI);
+reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
+// Dynamic register class that selects between long_reg and long_reg_no_ebp.
+reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of integer register pairs that aligns with calling convention
 reg_class eadx_reg( EAX,EDX );
 reg_class ebcx_reg( ECX,EBX );
+
 // Not AX or DX, used in divides
-reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
+reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
+// Not AX or DX (and neither EBP), used in divides
+reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
+// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
+reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 
 // Floating point registers.  Notice FPR0 is not a choice.
 // FPR0 is not ever allocated; we use clever encodings to fake
@@ -240,18 +284,11 @@
   return size;
 }
 
-static int preserve_SP_size() {
-  return 2;  // op, rm(reg/reg)
-}
-
 // !!!!! Special hack to get all type of calls to specify the byte offset
 //       from the start of the call to the point where the return address
 //       will point.
 int MachCallStaticJavaNode::ret_addr_offset() {
-  int offset = 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
-  if (_method_handle_invoke)
-    offset += preserve_SP_size();
-  return offset;
+  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points  
 }
 
 int MachCallDynamicJavaNode::ret_addr_offset() {
@@ -285,15 +322,6 @@
 
 // The address of the call instruction needs to be 4-byte aligned to
 // ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
-  current_offset += pre_call_resets_size();  // skip fldcw, if any
-  current_offset += preserve_SP_size();   // skip mov rbp, rsp
-  current_offset += 1;      // skip call opcode byte
-  return round_to(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
   current_offset += pre_call_resets_size();  // skip fldcw, if any
   current_offset += 5;      // skip MOV instruction
@@ -523,6 +551,10 @@
     st->print("# stack bang (%d bytes)", bangsize);
     st->print("\n\t");
     st->print("PUSH   EBP\t# Save EBP");
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
+    }
     if (framesize) {
       st->print("\n\t");
       st->print("SUB    ESP, #%d\t# Create frame",framesize);
@@ -532,6 +564,10 @@
     st->print("\n\t");
     framesize -= wordSize;
     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("MOV    EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize));
+    }
   }
 
   if (VerifyStackAtCalls) {
@@ -1488,7 +1524,7 @@
 }
 
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return EBP_REG_mask();
+  return NO_REG_mask();
 }
 
 // Returns true if the high 32 bits of the value is known to be zero.
@@ -3734,7 +3770,7 @@
 
 // On windows95, EBP is not safe to use for implicit null tests.
 operand eRegP_no_EBP() %{
-  constraint(ALLOC_IN_RC(int_reg_no_rbp));
+  constraint(ALLOC_IN_RC(int_reg_no_ebp));
   match(RegP);
   match(eAXRegP);
   match(eBXRegP);
@@ -3823,13 +3859,6 @@
   interface(REG_INTER);
 %}
 
-operand eBPRegP() %{
-  constraint(ALLOC_IN_RC(ebp_reg));
-  match(RegP);
-  format %{ "EBP" %}
-  interface(REG_INTER);
-%}
-
 operand eRegL() %{
   constraint(ALLOC_IN_RC(long_reg));
   match(RegL);
@@ -12708,7 +12737,6 @@
 //       compute_padding() functions will have to be adjusted.
 instruct CallStaticJavaDirect(method meth) %{
   match(CallStaticJava);
-  predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
   effect(USE meth);
 
   ins_cost(300);
@@ -12722,29 +12750,6 @@
   ins_alignment(4);
 %}
 
-// Call Java Static Instruction (method handle version)
-// Note: If this code changes, the corresponding ret_addr_offset() and
-//       compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
-  match(CallStaticJava);
-  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
-  effect(USE meth);
-  // EBP is saved by all callees (for interpreter stack correction).
-  // We use it here for a similar purpose, in {preserve,restore}_SP.
-
-  ins_cost(300);
-  format %{ "CALL,static/MethodHandle " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode( pre_call_resets,
-              preserve_SP,
-              Java_Static_Call( meth ),
-              restore_SP,
-              call_epilog,
-              post_call_FPU );
-  ins_pipe( pipe_slow );
-  ins_alignment(4);
-%}
-
 // Call Java Dynamic Instruction
 // Note: If this code changes, the corresponding ret_addr_offset() and
 //       compute_padding() functions will have to be adjusted.
--- a/src/cpu/x86/vm/x86_64.ad	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Wed Jun 10 23:13:44 2015 -0700
@@ -166,42 +166,67 @@
 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 //
 
-// Class for all pointer registers (including RSP)
-reg_class any_reg(RAX, RAX_H,
-                  RDX, RDX_H,
-                  RBP, RBP_H,
-                  RDI, RDI_H,
-                  RSI, RSI_H,
-                  RCX, RCX_H,
-                  RBX, RBX_H,
-                  RSP, RSP_H,
-                  R8,  R8_H,
-                  R9,  R9_H,
-                  R10, R10_H,
-                  R11, R11_H,
-                  R12, R12_H,
-                  R13, R13_H,
-                  R14, R14_H,
-                  R15, R15_H);
-
-// Class for all pointer registers except RSP
-reg_class ptr_reg(RAX, RAX_H,
-                  RDX, RDX_H,
-                  RBP, RBP_H,
-                  RDI, RDI_H,
-                  RSI, RSI_H,
-                  RCX, RCX_H,
-                  RBX, RBX_H,
-                  R8,  R8_H,
-                  R9,  R9_H,
-                  R10, R10_H,
-                  R11, R11_H,
-                  R13, R13_H,
-                  R14, R14_H);
-
-// Class for all pointer registers except RAX and RSP
-reg_class ptr_no_rax_reg(RDX, RDX_H,
-                         RBP, RBP_H,
+// Empty register class.
+reg_class no_reg();
+
+// Class for all pointer registers (including RSP and RBP)
+reg_class any_reg_with_rbp(RAX, RAX_H,
+                           RDX, RDX_H,
+                           RBP, RBP_H,               
+                           RDI, RDI_H,
+                           RSI, RSI_H,
+                           RCX, RCX_H,
+                           RBX, RBX_H,
+                           RSP, RSP_H,
+                           R8,  R8_H,
+                           R9,  R9_H,
+                           R10, R10_H,
+                           R11, R11_H,
+                           R12, R12_H,
+                           R13, R13_H,
+                           R14, R14_H,
+                           R15, R15_H);
+
+// Class for all pointer registers (including RSP, but excluding RBP)
+reg_class any_reg_no_rbp(RAX, RAX_H,
+                         RDX, RDX_H,                
+                         RDI, RDI_H,
+                         RSI, RSI_H,
+                         RCX, RCX_H,
+                         RBX, RBX_H,
+                         RSP, RSP_H,
+                         R8,  R8_H,
+                         R9,  R9_H,
+                         R10, R10_H,
+                         R11, R11_H,
+                         R12, R12_H,
+                         R13, R13_H,
+                         R14, R14_H,
+                         R15, R15_H);
+
+// Dynamic register class that selects at runtime between register classes
+// any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer). 
+// Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp;
+reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %});
+                  
+// Class for all pointer registers (excluding RSP)
+reg_class ptr_reg_with_rbp(RAX, RAX_H,
+                           RDX, RDX_H,
+                           RBP, RBP_H,
+                           RDI, RDI_H,
+                           RSI, RSI_H,
+                           RCX, RCX_H,
+                           RBX, RBX_H,
+                           R8,  R8_H,
+                           R9,  R9_H,
+                           R10, R10_H,
+                           R11, R11_H,
+                           R13, R13_H,
+                           R14, R14_H);
+
+// Class for all pointer registers (excluding RSP and RBP)
+reg_class ptr_reg_no_rbp(RAX, RAX_H,
+                         RDX, RDX_H,                         
                          RDI, RDI_H,
                          RSI, RSI_H,
                          RCX, RCX_H,
@@ -213,31 +238,66 @@
                          R13, R13_H,
                          R14, R14_H);
 
-reg_class ptr_no_rbp_reg(RDX, RDX_H,
-                         RAX, RAX_H,
-                         RDI, RDI_H,
-                         RSI, RSI_H,
-                         RCX, RCX_H,
-                         RBX, RBX_H,
-                         R8,  R8_H,
-                         R9,  R9_H,
-                         R10, R10_H,
-                         R11, R11_H,
-                         R13, R13_H,
-                         R14, R14_H);
-
-// Class for all pointer registers except RAX, RBX and RSP
-reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
-                             RBP, RBP_H,
-                             RDI, RDI_H,
-                             RSI, RSI_H,
-                             RCX, RCX_H,
-                             R8,  R8_H,
-                             R9,  R9_H,
-                             R10, R10_H,
-                             R11, R11_H,
-                             R13, R13_H,
-                             R14, R14_H);
+// Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp.
+reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RAX and RSP)
+reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H,
+                                  RBP, RBP_H,
+                                  RDI, RDI_H,
+                                  RSI, RSI_H,
+                                  RCX, RCX_H,
+                                  RBX, RBX_H,
+                                  R8,  R8_H,
+                                  R9,  R9_H,
+                                  R10, R10_H,
+                                  R11, R11_H,
+                                  R13, R13_H,
+                                  R14, R14_H);
+
+// Class for all pointer registers (excluding RAX, RSP, and RBP)
+reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H,
+                                RDI, RDI_H,
+                                RSI, RSI_H,
+                                RCX, RCX_H,
+                                RBX, RBX_H,
+                                R8,  R8_H,
+                                R9,  R9_H,
+                                R10, R10_H,
+                                R11, R11_H,
+                                R13, R13_H,
+                                R14, R14_H);
+
+// Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp.
+reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RAX, RBX, and RSP)
+reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H,
+                                      RBP, RBP_H,
+                                      RDI, RDI_H,
+                                      RSI, RSI_H,
+                                      RCX, RCX_H,
+                                      R8,  R8_H,
+                                      R9,  R9_H,
+                                      R10, R10_H,
+                                      R11, R11_H,
+                                      R13, R13_H,
+                                      R14, R14_H);
+
+// Class for all pointer registers (excluding RAX, RBX, RSP, and RBP)
+reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H,
+                                    RDI, RDI_H,
+                                    RSI, RSI_H,
+                                    RCX, RCX_H,
+                                    R8,  R8_H,
+                                    R9,  R9_H,
+                                    R10, R10_H,
+                                    R11, R11_H,
+                                    R13, R13_H,
+                                    R14, R14_H);
+
+// Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp.
+reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
 
 // Singleton class for RAX pointer register
 reg_class ptr_rax_reg(RAX, RAX_H);
@@ -251,59 +311,29 @@
 // Singleton class for RDI pointer register
 reg_class ptr_rdi_reg(RDI, RDI_H);
 
-// Singleton class for RBP pointer register
-reg_class ptr_rbp_reg(RBP, RBP_H);
-
 // Singleton class for stack pointer
 reg_class ptr_rsp_reg(RSP, RSP_H);
 
 // Singleton class for TLS pointer
 reg_class ptr_r15_reg(R15, R15_H);
 
-// Class for all long registers (except RSP)
-reg_class long_reg(RAX, RAX_H,
-                   RDX, RDX_H,
-                   RBP, RBP_H,
-                   RDI, RDI_H,
-                   RSI, RSI_H,
-                   RCX, RCX_H,
-                   RBX, RBX_H,
-                   R8,  R8_H,
-                   R9,  R9_H,
-                   R10, R10_H,
-                   R11, R11_H,
-                   R13, R13_H,
-                   R14, R14_H);
-
-// Class for all long registers except RAX, RDX (and RSP)
-reg_class long_no_rax_rdx_reg(RBP, RBP_H,
-                              RDI, RDI_H,
-                              RSI, RSI_H,
-                              RCX, RCX_H,
-                              RBX, RBX_H,
-                              R8,  R8_H,
-                              R9,  R9_H,
-                              R10, R10_H,
-                              R11, R11_H,
-                              R13, R13_H,
-                              R14, R14_H);
-
-// Class for all long registers except RCX (and RSP)
-reg_class long_no_rcx_reg(RBP, RBP_H,
-                          RDI, RDI_H,
-                          RSI, RSI_H,
-                          RAX, RAX_H,
-                          RDX, RDX_H,
-                          RBX, RBX_H,
-                          R8,  R8_H,
-                          R9,  R9_H,
-                          R10, R10_H,
-                          R11, R11_H,
-                          R13, R13_H,
-                          R14, R14_H);
-
-// Class for all long registers except RAX (and RSP)
-reg_class long_no_rax_reg(RBP, RBP_H,
+// Class for all long registers (excluding RSP)
+reg_class long_reg_with_rbp(RAX, RAX_H,
+                            RDX, RDX_H,
+                            RBP, RBP_H,
+                            RDI, RDI_H,
+                            RSI, RSI_H,
+                            RCX, RCX_H,
+                            RBX, RBX_H,
+                            R8,  R8_H,
+                            R9,  R9_H,
+                            R10, R10_H,
+                            R11, R11_H,
+                            R13, R13_H,
+                            R14, R14_H);
+
+// Class for all long registers (excluding RSP and RBP)
+reg_class long_reg_no_rbp(RAX, RAX_H,
                           RDX, RDX_H,
                           RDI, RDI_H,
                           RSI, RSI_H,
@@ -316,6 +346,67 @@
                           R13, R13_H,
                           R14, R14_H);
 
+// Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp.
+reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all long registers (excluding RAX, RDX and RSP)
+reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H,
+                                       RDI, RDI_H,
+                                       RSI, RSI_H,
+                                       RCX, RCX_H,
+                                       RBX, RBX_H,
+                                       R8,  R8_H,
+                                       R9,  R9_H,
+                                       R10, R10_H,
+                                       R11, R11_H,
+                                       R13, R13_H,
+                                       R14, R14_H);
+
+// Class for all long registers (excluding RAX, RDX, RSP, and RBP)
+reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H,
+                                     RSI, RSI_H,
+                                     RCX, RCX_H,
+                                     RBX, RBX_H,
+                                     R8,  R8_H,
+                                     R9,  R9_H,
+                                     R10, R10_H,
+                                     R11, R11_H,
+                                     R13, R13_H,
+                                     R14, R14_H);
+
+// Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp.
+reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all long registers (excluding RCX and RSP)
+reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H,
+                                   RDI, RDI_H,
+                                   RSI, RSI_H,
+                                   RAX, RAX_H,
+                                   RDX, RDX_H,
+                                   RBX, RBX_H,
+                                   R8,  R8_H,
+                                   R9,  R9_H,
+                                   R10, R10_H,
+                                   R11, R11_H,
+                                   R13, R13_H,
+                                   R14, R14_H);
+
+// Class for all long registers (excluding RCX, RSP, and RBP)
+reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H,
+                                 RSI, RSI_H,
+                                 RAX, RAX_H,
+                                 RDX, RDX_H,
+                                 RBX, RBX_H,
+                                 R8,  R8_H,
+                                 R9,  R9_H,
+                                 R10, R10_H,
+                                 R11, R11_H,
+                                 R13, R13_H,
+                                 R14, R14_H);
+
+// Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp.
+reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
+
 // Singleton class for RAX long register
 reg_class long_rax_reg(RAX, RAX_H);
 
@@ -325,27 +416,27 @@
 // Singleton class for RDX long register
 reg_class long_rdx_reg(RDX, RDX_H);
 
-// Class for all int registers (except RSP)
-reg_class int_reg(RAX,
-                  RDX,
-                  RBP,
-                  RDI,
-                  RSI,
-                  RCX,
-                  RBX,
-                  R8,
-                  R9,
-                  R10,
-                  R11,
-                  R13,
-                  R14);
-
-// Class for all int registers except RCX (and RSP)
-reg_class int_no_rcx_reg(RAX,
+// Class for all int registers (excluding RSP)
+reg_class int_reg_with_rbp(RAX,
+                           RDX,
+                           RBP,
+                           RDI,
+                           RSI,
+                           RCX,
+                           RBX,
+                           R8,
+                           R9,
+                           R10,
+                           R11,
+                           R13,
+                           R14);
+
+// Class for all int registers (excluding RSP and RBP)
+reg_class int_reg_no_rbp(RAX,
                          RDX,
-                         RBP,
                          RDI,
                          RSI,
+                         RCX,
                          RBX,
                          R8,
                          R9,
@@ -354,18 +445,66 @@
                          R13,
                          R14);
 
-// Class for all int registers except RAX, RDX (and RSP)
-reg_class int_no_rax_rdx_reg(RBP,
-                             RDI,
-                             RSI,
-                             RCX,
-                             RBX,
-                             R8,
-                             R9,
-                             R10,
-                             R11,
-                             R13,
-                             R14);
+// Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp.
+reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all int registers (excluding RCX and RSP)
+reg_class int_no_rcx_reg_with_rbp(RAX,
+                                  RDX,
+                                  RBP,
+                                  RDI,
+                                  RSI,
+                                  RBX,
+                                  R8,
+                                  R9,
+                                  R10,
+                                  R11,
+                                  R13,
+                                  R14);
+
+// Class for all int registers (excluding RCX, RSP, and RBP)
+reg_class int_no_rcx_reg_no_rbp(RAX,
+                                RDX,
+                                RDI,
+                                RSI,
+                                RBX,
+                                R8,
+                                R9,
+                                R10,
+                                R11,
+                                R13,
+                                R14);
+
+// Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp.
+reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all int registers (excluding RAX, RDX, and RSP)
+reg_class int_no_rax_rdx_reg_with_rbp(RBP,
+                                      RDI,
+                                      RSI,
+                                      RCX,
+                                      RBX,
+                                      R8,
+                                      R9,
+                                      R10,
+                                      R11,
+                                      R13,
+                                      R14);
+
+// Class for all int registers (excluding RAX, RDX, RSP, and RBP)
+reg_class int_no_rax_rdx_reg_no_rbp(RDI,
+                                    RSI,
+                                    RCX,
+                                    RBX,
+                                    R8,
+                                    R9,
+                                    R10,
+                                    R11,
+                                    R13,
+                                    R14);
+
+// Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp.
+reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 
 // Singleton class for RAX int register
 reg_class int_rax_reg(RAX);
@@ -396,9 +535,6 @@
 
 #define __ _masm.
 
-static int preserve_SP_size() {
-  return 3;  // rex.w, op, rm(reg/reg)
-}
 static int clear_avx_size() {
   return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 }
@@ -409,9 +545,7 @@
 int MachCallStaticJavaNode::ret_addr_offset()
 {
   int offset = 5; // 5 bytes from start of call to where return address points
-  offset += clear_avx_size();
-  if (_method_handle_invoke)
-    offset += preserve_SP_size();
+  offset += clear_avx_size();  
   return offset;
 }
 
@@ -450,16 +584,6 @@
 
 // The address of the call instruction needs to be 4-byte aligned to
 // ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaHandleNode::compute_padding(int current_offset) const
-{
-  current_offset += preserve_SP_size();   // skip mov rbp, rsp
-  current_offset += clear_avx_size(); // skip vzeroupper
-  current_offset += 1; // skip call opcode byte
-  return round_to(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 {
   current_offset += clear_avx_size(); // skip vzeroupper
@@ -724,6 +848,10 @@
     st->print("# stack bang (%d bytes)", bangsize);
     st->print("\n\t");
     st->print("pushq   rbp\t# Save rbp");
+    if (PreserveFramePointer) {
+        st->print("\n\t");
+        st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
+    }
     if (framesize) {
       st->print("\n\t");
       st->print("subq    rsp, #%d\t# Create frame",framesize);
@@ -732,7 +860,11 @@
     st->print("subq    rsp, #%d\t# Create frame",framesize);
     st->print("\n\t");
     framesize -= wordSize;
-    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
+    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);    
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("movq    rbp, [rsp + #%d]\t# Save the caller's SP into rbp", (framesize + wordSize));
+    }
   }
 
   if (VerifyStackAtCalls) {
@@ -1598,8 +1730,9 @@
   return LONG_RDX_REG_mask();
 }
 
+// Register for saving SP into on method handle invokes. Not used on x86_64.
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return PTR_RBP_REG_mask();
+    return NO_REG_mask();
 }
 
 %}
@@ -3202,7 +3335,7 @@
 // Pointer Register
 operand any_RegP()
 %{
-  constraint(ALLOC_IN_RC(any_reg));
+  constraint(ALLOC_IN_RC(any_reg));  
   match(RegP);
   match(rax_RegP);
   match(rbx_RegP);
@@ -3224,8 +3357,8 @@
   match(rbx_RegP);
   match(rdi_RegP);
   match(rsi_RegP);
-  match(rbp_RegP);
-  match(r15_RegP);  // See Q&A below about r15_RegP.
+  match(rbp_RegP);  // See Q&A below about
+  match(r15_RegP);  // r15_RegP and rbp_RegP.
 
   format %{ %}
   interface(REG_INTER);
@@ -3241,11 +3374,14 @@
 
 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
-// It's fine for an instruction input which expects rRegP to match a r15_RegP.
+// It's fine for an instruction input that expects rRegP to match a r15_RegP.
 // The output of an instruction is controlled by the allocator, which respects
 // register class masks, not match rules.  Unless an instruction mentions
 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 // by the allocator as an input.
+// The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
+// the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
+// result, RBP is not included in the output of the instruction either.
 
 operand no_rax_RegP()
 %{
@@ -3259,9 +3395,11 @@
   interface(REG_INTER);
 %}
 
+// This operand is not allowed to use RBP even if
+// RBP is not used to hold the frame pointer.
 operand no_rbp_RegP()
 %{
-  constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
+  constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
   match(RegP);
   match(rbx_RegP);
   match(rsi_RegP);
@@ -3338,16 +3476,6 @@
   interface(REG_INTER);
 %}
 
-operand rbp_RegP()
-%{
-  constraint(ALLOC_IN_RC(ptr_rbp_reg));
-  match(RegP);
-  match(rRegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
 operand r15_RegP()
 %{
   constraint(ALLOC_IN_RC(ptr_r15_reg));
@@ -11414,7 +11542,6 @@
 //       compute_padding() functions will have to be adjusted.
 instruct CallStaticJavaDirect(method meth) %{
   match(CallStaticJava);
-  predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
   effect(USE meth);
 
   ins_cost(300);
@@ -11425,27 +11552,6 @@
   ins_alignment(4);
 %}
 
-// Call Java Static Instruction (method handle version)
-// Note: If this code changes, the corresponding ret_addr_offset() and
-//       compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
-  match(CallStaticJava);
-  predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
-  effect(USE meth);
-  // RBP is saved by all callees (for interpreter stack correction).
-  // We use it here for a similar purpose, in {preserve,restore}_SP.
-
-  ins_cost(300);
-  format %{ "call,static/MethodHandle " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode(clear_avx, preserve_SP,
-             Java_Static_Call(meth),
-             restore_SP,
-             call_epilog);
-  ins_pipe(pipe_slow);
-  ins_alignment(4);
-%}
-
 // Call Java Dynamic Instruction
 // Note: If this code changes, the corresponding ret_addr_offset() and
 //       compute_padding() functions will have to be adjusted.
--- a/src/cpu/zero/vm/globals_zero.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/cpu/zero/vm/globals_zero.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -61,6 +61,8 @@
 
 define_pd_global(uintx, TypeProfileLevel, 0);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct)
 
 #endif // CPU_ZERO_VM_GLOBALS_ZERO_HPP
--- a/src/share/vm/adlc/adlparse.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/adlc/adlparse.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -800,6 +800,7 @@
       }
       if (strcmp(token,"reg_def")==0)          { reg_def_parse(); }
       else if (strcmp(token,"reg_class")==0)   { reg_class_parse(); }
+      else if (strcmp(token, "reg_class_dynamic") == 0) { reg_class_dynamic_parse(); }
       else if (strcmp(token,"alloc_class")==0) { alloc_class_parse(); }
       else if (strcmp(token,"#define")==0)     { preproc_define(); }
       else { parse_err(SYNERR, "bad token %s inside register block.\n", token); break; }
@@ -2323,11 +2324,12 @@
   // Debug Stuff
   if (_AD._adl_debug >1) fprintf(stderr,"Register Class: %s\n", cname);
 
-  RegClass *reg_class = _AD._register->addRegClass(cname);
-
-  // Collect registers in class
   skipws();
   if (_curchar == '(') {
+    // A register list is defined for the register class.
+    // Collect registers into a generic RegClass register class.
+    RegClass* reg_class = _AD._register->addRegClass<RegClass>(cname);
+
     next_char();                  // Skip '('
     skipws();
     while (_curchar != ')') {
@@ -2352,12 +2354,15 @@
     }
     next_char();                  // Skip closing ')'
   } else if (_curchar == '%') {
+    // A code snippet is defined for the register class.
+    // Collect the code snippet into a CodeSnippetRegClass register class.
+    CodeSnippetRegClass* reg_class = _AD._register->addRegClass<CodeSnippetRegClass>(cname);
     char *code = find_cpp_block("reg class");
     if (code == NULL) {
       parse_err(SYNERR, "missing code declaration for reg class.\n");
       return;
     }
-    reg_class->_user_defined = code;
+    reg_class->set_code_snippet(code);
     return;
   }
 
@@ -2374,6 +2379,87 @@
   return;
 }
 
+//------------------------------reg_class_dynamic_parse------------------------
+void ADLParser::reg_class_dynamic_parse(void) {
+  char *cname; // Name of dynamic register class being defined
+
+  // Get register class name
+  skipws();
+  cname = get_ident();
+  if (cname == NULL) {
+    parse_err(SYNERR, "missing dynamic register class name after 'reg_class_dynamic'\n");
+    return;
+  }
+
+  if (_AD._adl_debug > 1) {
+    fprintf(stdout, "Dynamic Register Class: %s\n", cname);
+  }
+
+  skipws();
+  if (_curchar != '(') {
+    parse_err(SYNERR, "missing '(' at the beginning of reg_class_dynamic definition\n");
+    return;
+  }
+  next_char();
+  skipws();
+
+  // Collect two register classes and the C++ code representing the condition code used to
+  // select between the two classes into a ConditionalRegClass register class.
+  ConditionalRegClass* reg_class = _AD._register->addRegClass<ConditionalRegClass>(cname);
+  int i;
+  for (i = 0; i < 2; i++) {
+    char* name = get_ident();
+    if (name == NULL) {
+      parse_err(SYNERR, "missing class identifier inside reg_class_dynamic list.\n");
+      return;
+    }
+    RegClass* rc = _AD._register->getRegClass(name);
+    if (rc == NULL) {
+      parse_err(SEMERR, "unknown identifier %s inside reg_class_dynamic list.\n", name);
+    } else {
+      reg_class->set_rclass_at_index(i, rc);
+    }
+
+    skipws();
+    if (_curchar == ',') {
+      next_char();
+      skipws();
+    } else {
+      parse_err(SYNERR, "missing separator ',' inside reg_class_dynamic list.\n");
+    }
+  }
+
+  // Collect the condition code.
+  skipws();
+  if (_curchar == '%') {
+    char* code = find_cpp_block("reg class dynamic");
+    if (code == NULL) {
+       parse_err(SYNERR, "missing code declaration for reg_class_dynamic.\n");
+       return;
+    }
+    reg_class->set_condition_code(code);
+  } else {
+    parse_err(SYNERR, "missing %% at the beginning of code block in reg_class_dynamic definition\n");
+    return;
+  }
+
+  skipws();
+  if (_curchar != ')') {
+    parse_err(SYNERR, "missing ')' at the end of reg_class_dynamic definition\n");
+    return;
+  }
+  next_char();
+
+  skipws();
+  if (_curchar != ';') {
+    parse_err(SYNERR, "missing ';' at the end of reg_class_dynamic definition.\n");
+    return;
+  }
+  next_char();                    // Skip trailing ';'
+
+  return;
+}
+
 //------------------------------alloc_class_parse------------------------------
 void ADLParser::alloc_class_parse(void) {
   char *name;                     // Name of allocation class being defined
--- a/src/share/vm/adlc/adlparse.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/adlc/adlparse.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -53,6 +53,8 @@
 // ***** Register Section *****
 class RegDef;
 class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
 class AllocClass;
 class ResourceForm;
 // ***** Pipeline Section *****
@@ -127,6 +129,7 @@
   // Parse components of the register section
   void reg_def_parse(void);              // Parse register definition
   void reg_class_parse(void);            // Parse register class definition
+  void reg_class_dynamic_parse(void);    // Parse dynamic register class definition
   void alloc_class_parse(void);          // Parse allocation class definition
 
   // Parse components of the definition section
--- a/src/share/vm/adlc/archDesc.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/adlc/archDesc.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -934,7 +934,7 @@
 void ArchDesc::set_stack_or_reg(const char *reg_class_name) {
   if( _register ) {
     RegClass *reg_class  = _register->getRegClass(reg_class_name);
-    reg_class->_stack_or_reg = true;
+    reg_class->set_stack_version(true);
   }
 }
 
--- a/src/share/vm/adlc/forms.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/adlc/forms.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -68,6 +68,8 @@
 class InsEncode;
 class RegDef;
 class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
 class AllocClass;
 class ResourceForm;
 class PipeClassForm;
--- a/src/share/vm/adlc/formsopt.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/adlc/formsopt.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -47,13 +47,19 @@
 }
 
 // record a new register class
-RegClass *RegisterForm::addRegClass(const char *className) {
-  RegClass *regClass = new RegClass(className);
+template <typename T>
+T* RegisterForm::addRegClass(const char* className) {
+  T* regClass = new T(className);
   _rclasses.addName(className);
-  _regClass.Insert(className,regClass);
+  _regClass.Insert(className, regClass);
   return regClass;
 }
 
+// Explicit instantiation for all supported register classes.
+template RegClass* RegisterForm::addRegClass<RegClass>(const char* className);
+template CodeSnippetRegClass* RegisterForm::addRegClass<CodeSnippetRegClass>(const char* className);
+template ConditionalRegClass* RegisterForm::addRegClass<ConditionalRegClass>(const char* className);
+
 // record a new register class
 AllocClass *RegisterForm::addAllocClass(char *className) {
   AllocClass *allocClass = new AllocClass(className);
@@ -67,9 +73,9 @@
 void RegisterForm::addSpillRegClass() {
   // Stack slots start at the next available even register number.
   _reg_ctr = (_reg_ctr+7) & ~7;
-  const char *rc_name   = "stack_slots";
-  RegClass   *reg_class = new RegClass(rc_name);
-  reg_class->_stack_or_reg = true;
+  const char *rc_name = "stack_slots";
+  RegClass* reg_class = new RegClass(rc_name);
+  reg_class->set_stack_version(true);
   _rclasses.addName(rc_name);
   _regClass.Insert(rc_name,reg_class);
 }
@@ -224,9 +230,11 @@
 
 //------------------------------RegClass---------------------------------------
 // Construct a register class into which registers will be inserted
-RegClass::RegClass(const char *classid) : _stack_or_reg(false), _classid(classid), _regDef(cmpstr,hashstr, Form::arena),
-                                          _user_defined(NULL)
-{
+RegClass::RegClass(const char* classid) : _stack_or_reg(false), _classid(classid), _regDef(cmpstr, hashstr, Form::arena) {
+}
+
+RegClass::~RegClass() {
+  delete _classid;
 }
 
 // record a register in this class
@@ -305,6 +313,91 @@
   fprintf(fp,"--- done with entries for reg_class %s\n\n",_classid);
 }
 
+void RegClass::declare_register_masks(FILE* fp) {
+  const char* prefix = "";
+  const char* rc_name_to_upper = toUpper(_classid);
+  fprintf(fp, "extern const RegMask _%s%s_mask;\n", prefix,  rc_name_to_upper);
+  fprintf(fp, "inline const RegMask &%s%s_mask() { return _%s%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
+  if (_stack_or_reg) {
+    fprintf(fp, "extern const RegMask _%sSTACK_OR_%s_mask;\n", prefix, rc_name_to_upper);
+    fprintf(fp, "inline const RegMask &%sSTACK_OR_%s_mask() { return _%sSTACK_OR_%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
+  }
+  delete[] rc_name_to_upper;
+}
+
+void RegClass::build_register_masks(FILE* fp) {
+  int len = RegisterForm::RegMask_Size();
+  const char *prefix = "";
+  const char* rc_name_to_upper = toUpper(_classid);
+  fprintf(fp, "const RegMask _%s%s_mask(", prefix, rc_name_to_upper);
+
+  int i;
+  for(i = 0; i < len - 1; i++) {
+    fprintf(fp," 0x%x,", regs_in_word(i, false));
+  }
+  fprintf(fp," 0x%x );\n", regs_in_word(i, false));
+
+  if (_stack_or_reg) {
+    fprintf(fp, "const RegMask _%sSTACK_OR_%s_mask(", prefix, rc_name_to_upper);
+    for(i = 0; i < len - 1; i++) {
+      fprintf(fp," 0x%x,", regs_in_word(i, true));
+    }
+    fprintf(fp," 0x%x );\n", regs_in_word(i, true));
+  }
+  delete[] rc_name_to_upper;
+}
+
+//------------------------------CodeSnippetRegClass---------------------------
+CodeSnippetRegClass::CodeSnippetRegClass(const char* classid) : RegClass(classid), _code_snippet(NULL) {
+}
+
+CodeSnippetRegClass::~CodeSnippetRegClass() {
+  delete _code_snippet;
+}
+
+void CodeSnippetRegClass::declare_register_masks(FILE* fp) {
+  const char* prefix = "";
+  const char* rc_name_to_upper = toUpper(_classid);
+  fprintf(fp, "inline const RegMask &%s%s_mask() { %s }\n", prefix, rc_name_to_upper, _code_snippet);
+  delete[] rc_name_to_upper;
+}
+
+//------------------------------ConditionalRegClass---------------------------
+ConditionalRegClass::ConditionalRegClass(const char *classid) : RegClass(classid), _condition_code(NULL) {
+}
+
+ConditionalRegClass::~ConditionalRegClass() {
+  delete _condition_code;
+}
+
+void ConditionalRegClass::declare_register_masks(FILE* fp) {
+  const char* prefix = "";
+  const char* rc_name_to_upper = toUpper(_classid);
+  const char* rclass_0_to_upper = toUpper(_rclasses[0]->_classid);
+  const char* rclass_1_to_upper = toUpper(_rclasses[1]->_classid);
+  fprintf(fp, "inline const RegMask &%s%s_mask() {"
+              " return (%s) ?"
+              " %s%s_mask() :"
+              " %s%s_mask(); }\n",
+              prefix, rc_name_to_upper,
+              _condition_code,
+              prefix, rclass_0_to_upper,
+              prefix, rclass_1_to_upper);
+  if (_stack_or_reg) {
+    fprintf(fp, "inline const RegMask &%sSTACK_OR_%s_mask() {"
+                  " return (%s) ?"
+                  " %sSTACK_OR_%s_mask() :"
+                  " %sSTACK_OR_%s_mask(); }\n",
+                  prefix, rc_name_to_upper,
+                  _condition_code,
+                  prefix, rclass_0_to_upper,
+                  prefix, rclass_1_to_upper);
+  }
+  delete[] rc_name_to_upper;
+  delete[] rclass_0_to_upper;
+  delete[] rclass_1_to_upper;
+  return;
+}
 
 //------------------------------AllocClass-------------------------------------
 AllocClass::AllocClass(char *classid) : _classid(classid), _regDef(cmpstr,hashstr, Form::arena) {
--- a/src/share/vm/adlc/formsopt.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/adlc/formsopt.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -60,6 +60,8 @@
 class InsEncode;
 class RegDef;
 class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
 class AllocClass;
 class ResourceForm;
 class PipeClassForm;
@@ -98,7 +100,8 @@
 
   void        addRegDef(char *regName, char *callingConv, char *c_conv,
                         char * idealtype, char *encoding, char* concreteName);
-  RegClass   *addRegClass(const char *className);
+  template<typename T> T* addRegClass(const char* className);
+
   AllocClass *addAllocClass(char *allocName);
   void        addSpillRegClass();
 
@@ -154,17 +157,28 @@
 };
 
 //------------------------------RegClass---------------------------------------
+// Generic register class. This register class is the internal representation
+// for the following .ad file format:
+//
+//  reg_class ptr(RAX, RBX, ...);
+//
+// where ptr is the name of the register class, RAX and RBX are registers.
+//
+// This register class allows registers to be spilled onto the stack. Spilling
+// is allowed is field _stack_or_reg is true.
 class RegClass : public Form {
 public:
   // Public Data
   const char *_classid;         // Name of class
   NameList    _regDefs;         // List of registers in class
   Dict        _regDef;          // Dictionary of registers in class
+protected:
   bool        _stack_or_reg;    // Allowed on any stack slot
-  char*       _user_defined;
 
+public:
   // Public Methods
   RegClass(const char *classid);// Constructor
+  virtual ~RegClass();
 
   void addReg(RegDef *regDef);  // Add a register to this class
 
@@ -183,6 +197,115 @@
 
   void dump();                  // Debug printer
   void output(FILE *fp);        // Write info to output files
+
+  virtual bool has_stack_version() {
+    return _stack_or_reg;
+  }
+  virtual void set_stack_version(bool flag) {
+    _stack_or_reg = flag;
+  }
+
+  virtual void declare_register_masks(FILE* fp);
+  virtual void build_register_masks(FILE* fp);
+};
+
+//------------------------------CodeSnippetRegClass----------------------------
+// Register class that has an user-defined C++ code snippet attached to it
+// to determine at runtime which register class to use. This register class is
+// the internal representation for the following .ad file format:
+//
+//  reg_class actual_dflt_reg %{
+//      if (VM_Version::has_vfp3_32()) {
+//          return DFLT_REG_mask();
+//      } else {
+//          return DFLT_LOW_REG_mask();
+//      }
+//  %}
+//
+// where DFLT_REG_mask() and DFLT_LOW_REG_mask() are the internal names of the
+// masks of register classes dflt_reg and dflt_low_reg.
+//
+// The attached code snippet can select also between more than two register classes.
+// This register class can be, however, used only if the register class is not
+// cisc-spillable (i.e., the registers of this class are not allowed on the stack,
+// which is equivalent with _stack_or_reg being false).
+class CodeSnippetRegClass : public RegClass {
+protected:
+  char* _code_snippet;
+public:
+  CodeSnippetRegClass(const char* classid);// Constructor
+  ~CodeSnippetRegClass();
+
+  void set_code_snippet(char* code) {
+    _code_snippet = code;
+  }
+  char* code_snippet() {
+    return _code_snippet;
+  }
+  void set_stack_version(bool flag) {
+    assert(false, "User defined register classes are not allowed to spill to the stack.");
+  }
+  void declare_register_masks(FILE* fp);
+  void build_register_masks(FILE* fp) {
+    // We do not need to generate register masks because we select at runtime
+    // between register masks generated for other register classes.
+    return;
+  }
+};
+
+//------------------------------ConditionalRegClass----------------------------
+// Register class that has two register classes and a runtime condition attached
+// to it. The condition is evaluated at runtime and either one of the register
+// attached register classes is selected. This register class is the internal
+// representation for the following .ad format:
+//
+//  reg_class_dynamic actual_dflt_reg(dflt_reg, low_reg,
+//                                    %{ VM_Version::has_vfp3_32() }%
+//                                    );
+//
+// This example is equivalent to the example used with the CodeSnippetRegClass
+// register class. A ConditionalRegClass works also if a register class is cisc-spillable
+// (i.e., _stack_or_reg is true), but if can select only between two register classes.
+class ConditionalRegClass : public RegClass {
+protected:
+  // reference to condition code
+  char* _condition_code;  // C++ condition code to dynamically determine which register class to use.
+
+                          // Example syntax (equivalent to previous example):
+                          //
+                          //  reg_class actual_dflt_reg(dflt_reg, low_reg,
+                          //                            %{ VM_Version::has_vfp3_32() }%
+                          //                            );
+  // reference to conditional register classes
+  RegClass* _rclasses[2]; // 0 is the register class selected if the condition code returns true
+                          // 1 is the register class selected if the condition code returns false
+public:
+  ConditionalRegClass(const char* classid);// Constructor
+  ~ConditionalRegClass();
+
+  virtual void set_stack_version(bool flag) {
+    RegClass::set_stack_version(flag);
+    assert((_rclasses[0] != NULL), "Register class NULL for condition code == true");
+    assert((_rclasses[1] != NULL), "Register class NULL for condition code == false");
+    _rclasses[0]->set_stack_version(flag);
+    _rclasses[1]->set_stack_version(flag);
+  }
+  void declare_register_masks(FILE* fp);
+  void build_register_masks(FILE* fp) {
+    // We do not need to generate register masks because we select at runtime
+    // between register masks generated for other register classes.
+    return;
+  }
+  void set_rclass_at_index(int index, RegClass* rclass) {
+    assert((0 <= index && index < 2), "Condition code can select only between two register classes");
+    _rclasses[index] = rclass;
+  }
+  void set_condition_code(char* code) {
+    _condition_code = code;
+  }
+  char* condition_code() {
+    return _condition_code;
+  }
 };
 
 //------------------------------AllocClass-------------------------------------
--- a/src/share/vm/adlc/formssel.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/adlc/formssel.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -59,6 +59,8 @@
 class InsEncode;
 class RegDef;
 class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
 class AllocClass;
 class ResourceForm;
 class PipeDesc;
--- a/src/share/vm/adlc/output_c.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/adlc/output_c.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -155,26 +155,9 @@
     fprintf(fp_hpp,"// Register masks, one for each register class.\n");
     _register->_rclasses.reset();
     for (rc_name = NULL; (rc_name = _register->_rclasses.iter()) != NULL;) {
-      const char *prefix = "";
       RegClass *reg_class = _register->getRegClass(rc_name);
       assert(reg_class, "Using an undefined register class");
-
-      const char* rc_name_to_upper = toUpper(rc_name);
-
-      if (reg_class->_user_defined == NULL) {
-        fprintf(fp_hpp, "extern const RegMask _%s%s_mask;\n", prefix,  rc_name_to_upper);
-        fprintf(fp_hpp, "inline const RegMask &%s%s_mask() { return _%s%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
-      } else {
-        fprintf(fp_hpp, "inline const RegMask &%s%s_mask() { %s }\n", prefix, rc_name_to_upper, reg_class->_user_defined);
-      }
-
-      if (reg_class->_stack_or_reg) {
-        assert(reg_class->_user_defined == NULL, "no user defined reg class here");
-        fprintf(fp_hpp, "extern const RegMask _%sSTACK_OR_%s_mask;\n", prefix, rc_name_to_upper);
-        fprintf(fp_hpp, "inline const RegMask &%sSTACK_OR_%s_mask() { return _%sSTACK_OR_%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
-      }
-      delete[] rc_name_to_upper;
-
+      reg_class->declare_register_masks(fp_hpp);
     }
   }
 }
@@ -190,35 +173,9 @@
     fprintf(fp_cpp,"// Register masks, one for each register class.\n");
     _register->_rclasses.reset();
     for (rc_name = NULL; (rc_name = _register->_rclasses.iter()) != NULL;) {
-      const char *prefix = "";
       RegClass *reg_class = _register->getRegClass(rc_name);
       assert(reg_class, "Using an undefined register class");
-
-      if (reg_class->_user_defined != NULL) {
-        continue;
-      }
-
-      int len = RegisterForm::RegMask_Size();
-      const char* rc_name_to_upper = toUpper(rc_name);
-      fprintf(fp_cpp, "const RegMask _%s%s_mask(", prefix, rc_name_to_upper);
-
-      {
-        int i;
-        for(i = 0; i < len - 1; i++) {
-          fprintf(fp_cpp," 0x%x,", reg_class->regs_in_word(i, false));
-        }
-        fprintf(fp_cpp," 0x%x );\n", reg_class->regs_in_word(i, false));
-      }
-
-      if (reg_class->_stack_or_reg) {
-        int i;
-        fprintf(fp_cpp, "const RegMask _%sSTACK_OR_%s_mask(", prefix, rc_name_to_upper);
-        for(i = 0; i < len - 1; i++) {
-          fprintf(fp_cpp," 0x%x,",reg_class->regs_in_word(i, true));
-        }
-        fprintf(fp_cpp," 0x%x );\n",reg_class->regs_in_word(i, true));
-      }
-      delete[] rc_name_to_upper;
+      reg_class->build_register_masks(fp_cpp);
     }
   }
 }
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -4064,7 +4064,7 @@
       ValueType* type = apop()->type();
       if (type->is_constant()) {
         ciMethod* target = type->as_ObjectType()->constant_value()->as_member_name()->get_vmtarget();
-        // If the target is another method handle invoke try recursivly to get
+        // If the target is another method handle invoke, try to recursively get
         // a better target.
         if (target->is_method_handle_intrinsic()) {
           if (try_method_handle_inline(target)) {
--- a/src/share/vm/c1/c1_LIR.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/c1/c1_LIR.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -454,7 +454,7 @@
 //-------------------visits--------------------------
 
 // complete rework of LIR instruction visitor.
-// The virtual calls for each instruction type is replaced by a big
+// The virtual call for each instruction type is replaced by a big
 // switch that adds the operands for each instruction
 
 void LIR_OpVisitState::visit(LIR_Op* op) {
@@ -823,7 +823,8 @@
       }
 
       if (opJavaCall->_info)                     do_info(opJavaCall->_info);
-      if (opJavaCall->is_method_handle_invoke()) {
+      if (FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr &&
+          opJavaCall->is_method_handle_invoke()) {
         opJavaCall->_method_handle_invoke_SP_save_opr = FrameMap::method_handle_invoke_SP_save_opr();
         do_temp(opJavaCall->_method_handle_invoke_SP_save_opr);
       }
--- a/src/share/vm/c1/c1_LIR.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/c1/c1_LIR.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -1216,10 +1216,8 @@
   // JSR 292 support.
   bool is_invokedynamic() const                  { return code() == lir_dynamic_call; }
   bool is_method_handle_invoke() const {
-    return
-      method()->is_compiled_lambda_form()  // Java-generated adapter
-      ||
-      method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
+    return method()->is_compiled_lambda_form() ||   // Java-generated lambda form
+           method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
   }
 
   intptr_t vtable_offset() const {
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -2895,7 +2895,7 @@
 //   g) lock result registers and emit call operation
 //
 // Before issuing a call, we must spill-save all values on stack
-// that are in caller-save register. "spill-save" moves thos registers
+// that are in caller-save register. "spill-save" moves those registers
 // either in a free callee-save register or spills them if no free
 // callee save register is available.
 //
@@ -2903,7 +2903,7 @@
 // - if invoked between e) and f), we may lock callee save
 //   register in "spill-save" that destroys the receiver register
 //   before f) is executed
-// - if we rearange the f) to be earlier, by loading %o0, it
+// - if we rearrange f) to be earlier (by loading %o0) it
 //   may destroy a value on the stack that is currently in %o0
 //   and is waiting to be spilled
 // - if we keep the receiver locked while doing spill-save,
@@ -2936,14 +2936,16 @@
   assert(receiver->is_illegal() || receiver->is_equal(LIR_Assembler::receiverOpr()), "must match");
 
   // JSR 292
-  // Preserve the SP over MethodHandle call sites.
+  // Preserve the SP over MethodHandle call sites, if needed.
   ciMethod* target = x->target();
   bool is_method_handle_invoke = (// %%% FIXME: Are both of these relevant?
                                   target->is_method_handle_intrinsic() ||
                                   target->is_compiled_lambda_form());
   if (is_method_handle_invoke) {
     info->set_is_method_handle_invoke(true);
-    __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
+    if(FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
+        __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
+    }
   }
 
   switch (x->code()) {
@@ -2983,8 +2985,9 @@
   }
 
   // JSR 292
-  // Restore the SP after MethodHandle call sites.
-  if (is_method_handle_invoke) {
+  // Restore the SP after MethodHandle call sites, if needed.
+  if (is_method_handle_invoke
+      && FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
     __ move(FrameMap::method_handle_invoke_SP_save_opr(), FrameMap::stack_pointer());
   }
 
--- a/src/share/vm/memory/metaspace.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/memory/metaspace.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -622,8 +622,7 @@
   Metachunk* _chunks_in_use[NumberOfInUseLists];
   Metachunk* _current_chunk;
 
-  // Number of small chunks to allocate to a manager
-  // If class space manager, small chunks are unlimited
+  // Maximum number of small chunks to allocate to a SpaceManager
   static uint const _small_chunk_limit;
 
   // Sum of all space in allocated chunks
@@ -737,6 +736,8 @@
   // Block allocation and deallocation.
   // Allocates a block from the current chunk
   MetaWord* allocate(size_t word_size);
+  // Allocates a block from a small chunk
+  MetaWord* get_small_chunk_and_allocate(size_t word_size);
 
   // Helper for allocations
   MetaWord* allocate_work(size_t word_size);
@@ -2031,9 +2032,8 @@
 size_t SpaceManager::calc_chunk_size(size_t word_size) {
 
   // Decide between a small chunk and a medium chunk.  Up to
-  // _small_chunk_limit small chunks can be allocated but
-  // once a medium chunk has been allocated, no more small
-  // chunks will be allocated.
+  // _small_chunk_limit small chunks can be allocated.
+  // After that a medium chunk is preferred.
   size_t chunk_word_size;
   if (chunks_in_use(MediumIndex) == NULL &&
       sum_count_in_chunks_in_use(SmallIndex) < _small_chunk_limit) {
@@ -2101,7 +2101,7 @@
                             word_size, words_used, words_left);
   }
 
-  // Get another chunk out of the virtual space
+  // Get another chunk
   size_t grow_chunks_by_words = calc_chunk_size(word_size);
   Metachunk* next = get_new_chunk(word_size, grow_chunks_by_words);
 
@@ -2432,6 +2432,43 @@
   return next;
 }
 
+/*
+ * The policy is to allocate up to _small_chunk_limit small chunks
+ * after which only medium chunks are allocated.  This is done to
+ * reduce fragmentation.  In some cases, this can result in a lot
+ * of small chunks being allocated to the point where it's not
+ * possible to expand.  If this happens, there may be no medium chunks
+ * available and OOME would be thrown.  Instead of doing that,
+ * if the allocation request size fits in a small chunk, an attempt
+ * will be made to allocate a small chunk.
+ */
+MetaWord* SpaceManager::get_small_chunk_and_allocate(size_t word_size) {
+  if (word_size + Metachunk::overhead() > small_chunk_size()) {
+    return NULL;
+  }
+
+  MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
+  MutexLockerEx cl1(expand_lock(), Mutex::_no_safepoint_check_flag);
+
+  Metachunk* chunk = chunk_manager()->chunk_freelist_allocate(small_chunk_size());
+
+  MetaWord* mem = NULL;
+
+  if (chunk != NULL) {
+    // Add chunk to the in-use chunk list and do an allocation from it.
+    // Add to this manager's list of chunks in use.
+    add_chunk(chunk, false);
+    mem = chunk->allocate(word_size);
+
+    inc_used_metrics(word_size);
+
+    // Track metaspace memory usage statistic.
+    track_metaspace_memory_usage();
+  }
+
+  return mem;
+}
+
 MetaWord* SpaceManager::allocate(size_t word_size) {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
 
@@ -3511,7 +3548,18 @@
   }
 
   if (result == NULL) {
-    report_metadata_oome(loader_data, word_size, type, mdtype, CHECK_NULL);
+    SpaceManager* sm;
+    if (is_class_space_allocation(mdtype)) {
+      sm = loader_data->metaspace_non_null()->class_vsm();
+    } else {
+      sm = loader_data->metaspace_non_null()->vsm();
+    }
+
+    result = sm->get_small_chunk_and_allocate(word_size);
+
+    if (result == NULL) {
+      report_metadata_oome(loader_data, word_size, type, mdtype, CHECK_NULL);
+    }
   }
 
   // Zero initialize.
--- a/src/share/vm/opto/bytecodeInfo.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/bytecodeInfo.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -608,11 +608,11 @@
   }
   int max_inline_level_adjust = 0;
   if (caller_jvms->method() != NULL) {
-    if (caller_jvms->method()->is_compiled_lambda_form())
+    if (caller_jvms->method()->is_compiled_lambda_form()) {
       max_inline_level_adjust += 1;  // don't count actions in MH or indy adapter frames
-    else if (callee_method->is_method_handle_intrinsic() ||
-             callee_method->is_compiled_lambda_form()) {
-      max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implem
+    } else if (callee_method->is_method_handle_intrinsic() ||
+               callee_method->is_compiled_lambda_form()) {
+      max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implementation
     }
     if (max_inline_level_adjust != 0 && C->print_inlining() && (Verbose || WizardMode)) {
       CompileTask::print_inline_indent(inline_level());
--- a/src/share/vm/opto/graphKit.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -1452,16 +1452,18 @@
 // factory methods in "int adr_idx"
 Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
                           int adr_idx,
-                          MemNode::MemOrd mo, bool require_atomic_access) {
+                          MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency, bool require_atomic_access) {
   assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
   const TypePtr* adr_type = NULL; // debug-mode-only argument
   debug_only(adr_type = C->get_adr_type(adr_idx));
   Node* mem = memory(adr_idx);
   Node* ld;
   if (require_atomic_access && bt == T_LONG) {
-    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t, mo);
+    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t, mo, control_dependency);
+  } else if (require_atomic_access && bt == T_DOUBLE) {
+    ld = LoadDNode::make_atomic(C, ctl, mem, adr, adr_type, t, mo, control_dependency);
   } else {
-    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo);
+    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency);
   }
   ld = _gvn.transform(ld);
   if ((bt == T_OBJECT) && C->do_escape_analysis() || C->eliminate_boxing()) {
@@ -1482,6 +1484,8 @@
   Node* st;
   if (require_atomic_access && bt == T_LONG) {
     st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val, mo);
+  } else if (require_atomic_access && bt == T_DOUBLE) {
+    st = StoreDNode::make_atomic(C, ctl, mem, adr, adr_type, val, mo);
   } else {
     st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
   }
--- a/src/share/vm/opto/graphKit.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/graphKit.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -516,21 +516,24 @@
   // adapted the `do_put_xxx' and `do_get_xxx' procedures for the case
   // of volatile fields.
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
-                  MemNode::MemOrd mo, bool require_atomic_access = false) {
+                  MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
+                  bool require_atomic_access = false) {
     // This version computes alias_index from bottom_type
     return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(),
-                     mo, require_atomic_access);
+                     mo, control_dependency, require_atomic_access);
   }
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type,
-                  MemNode::MemOrd mo, bool require_atomic_access = false) {
+                  MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
+                  bool require_atomic_access = false) {
     // This version computes alias_index from an address type
     assert(adr_type != NULL, "use other make_load factory");
     return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type),
-                     mo, require_atomic_access);
+                     mo, control_dependency, require_atomic_access);
   }
   // This is the base version which is given an alias index.
   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx,
-                  MemNode::MemOrd mo, bool require_atomic_access = false);
+                  MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
+                  bool require_atomic_access = false);
 
   // Create & transform a StoreNode and store the effect into the
   // parser's memory state.
--- a/src/share/vm/opto/library_call.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/library_call.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -2669,7 +2669,9 @@
 
   if (!is_store) {
     MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered;
-    Node* p = make_load(control(), adr, value_type, type, adr_type, mo, is_volatile);
+    // To be valid, unsafe loads may depend on other conditions than
+    // the one that guards them: pin the Load node
+    Node* p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile);
     // load value
     switch (type) {
     case T_BOOLEAN:
@@ -6038,7 +6040,7 @@
   }
   // Build the load.
   MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
-  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, mo, is_vol);
+  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, is_vol);
   // If reference is volatile, prevent following memory ops from
   // floating up past the volatile read.  Also prevents commoning
   // another volatile read.
--- a/src/share/vm/opto/loopPredicate.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/loopPredicate.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -438,7 +438,13 @@
           }
         }
         if (all_inputs_invariant) {
-          _invariant.set(n->_idx); // I am a invariant too
+          // If n's control is a predicate that was moved out of the
+          // loop, it was marked invariant but n is only invariant if
+          // it depends only on that test. Otherwise, unless that test
+          // is out of the loop, it's not invariant.
+          if (n->is_CFG() || n->depends_only_on_test() || n->in(0) == NULL || !_phase->is_member(_lpt, n->in(0))) {
+            _invariant.set(n->_idx); // I am a invariant too
+          }
         }
       } else { // process next input
         _stack.set_index(idx + 1);
--- a/src/share/vm/opto/matcher.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/matcher.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -840,7 +840,7 @@
   MachNode *spillCP = match_tree(new (C) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,MemNode::unordered));
 #endif
   MachNode *spillI  = match_tree(new (C) LoadINode(NULL,mem,fp,atp,TypeInt::INT,MemNode::unordered));
-  MachNode *spillL  = match_tree(new (C) LoadLNode(NULL,mem,fp,atp,TypeLong::LONG,MemNode::unordered,false));
+  MachNode *spillL  = match_tree(new (C) LoadLNode(NULL,mem,fp,atp,TypeLong::LONG,MemNode::unordered, LoadNode::DependsOnlyOnTest,false));
   MachNode *spillF  = match_tree(new (C) LoadFNode(NULL,mem,fp,atp,Type::FLOAT,MemNode::unordered));
   MachNode *spillD  = match_tree(new (C) LoadDNode(NULL,mem,fp,atp,Type::DOUBLE,MemNode::unordered));
   MachNode *spillP  = match_tree(new (C) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,MemNode::unordered));
--- a/src/share/vm/opto/memnode.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/memnode.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -878,6 +878,9 @@
     // standard dump does this in Verbose and WizardMode
     st->print(" #"); _type->dump_on(st);
   }
+  if (!_depends_only_on_test) {
+    st->print(" (does not depend only on test)");
+  }
 }
 #endif
 
@@ -894,7 +897,7 @@
 
 //----------------------------LoadNode::make-----------------------------------
 // Polymorphic factory method:
-Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo) {
+Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo, ControlDependency control_dependency) {
   Compile* C = gvn.C;
 
   // sanity check the alias category against the created node type
@@ -910,36 +913,40 @@
           rt->isa_oopptr() || is_immutable_value(adr),
           "raw memory operations should have control edge");
   switch (bt) {
-  case T_BOOLEAN: return new (C) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(),  mo);
-  case T_BYTE:    return new (C) LoadBNode (ctl, mem, adr, adr_type, rt->is_int(),  mo);
-  case T_INT:     return new (C) LoadINode (ctl, mem, adr, adr_type, rt->is_int(),  mo);
-  case T_CHAR:    return new (C) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(),  mo);
-  case T_SHORT:   return new (C) LoadSNode (ctl, mem, adr, adr_type, rt->is_int(),  mo);
-  case T_LONG:    return new (C) LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo);
-  case T_FLOAT:   return new (C) LoadFNode (ctl, mem, adr, adr_type, rt,            mo);
-  case T_DOUBLE:  return new (C) LoadDNode (ctl, mem, adr, adr_type, rt,            mo);
-  case T_ADDRESS: return new (C) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(),  mo);
+  case T_BOOLEAN: return new (C) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
+  case T_BYTE:    return new (C) LoadBNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
+  case T_INT:     return new (C) LoadINode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
+  case T_CHAR:    return new (C) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
+  case T_SHORT:   return new (C) LoadSNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
+  case T_LONG:    return new (C) LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency);
+  case T_FLOAT:   return new (C) LoadFNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency);
+  case T_DOUBLE:  return new (C) LoadDNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency);
+  case T_ADDRESS: return new (C) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(),  mo, control_dependency);
   case T_OBJECT:
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-      Node* load  = gvn.transform(new (C) LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo));
+      Node* load  = gvn.transform(new (C) LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo, control_dependency));
       return new (C) DecodeNNode(load, load->bottom_type()->make_ptr());
     } else
 #endif
     {
       assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop");
-      return new (C) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo);
+      return new (C) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo, control_dependency);
     }
   }
   ShouldNotReachHere();
   return (LoadNode*)NULL;
 }
 
-LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo) {
+LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, ControlDependency control_dependency) {
   bool require_atomic = true;
-  return new (C) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, require_atomic);
+  return new (C) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic);
 }
 
+LoadDNode* LoadDNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, ControlDependency control_dependency) {
+  bool require_atomic = true;
+  return new (C) LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic);
+}
 
 
 
@@ -2398,6 +2405,11 @@
   return new (C) StoreLNode(ctl, mem, adr, adr_type, val, mo, require_atomic);
 }
 
+StoreDNode* StoreDNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) {
+  bool require_atomic = true;
+  return new (C) StoreDNode(ctl, mem, adr, adr_type, val, mo, require_atomic);
+}
+
 
 //--------------------------bottom_type----------------------------------------
 const Type *StoreNode::bottom_type() const {
--- a/src/share/vm/opto/memnode.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/memnode.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -138,7 +138,33 @@
 //------------------------------LoadNode---------------------------------------
 // Load value; requires Memory and Address
 class LoadNode : public MemNode {
+public:
+  // Some loads (from unsafe) should be pinned: they don't depend only
+  // on the dominating test.  The boolean field _depends_only_on_test
+  // below records whether that node depends only on the dominating
+  // test.
+  // Methods used to build LoadNodes pass an argument of type enum
+  // ControlDependency instead of a boolean because those methods
+  // typically have multiple boolean parameters with default values:
+  // passing the wrong boolean to one of these parameters by mistake
+  // goes easily unnoticed. Using an enum, the compiler can check that
+  // the type of a value and the type of the parameter match.
+  enum ControlDependency {
+    Pinned,
+    DependsOnlyOnTest
+  };
 private:
+  // LoadNode::hash() doesn't take the _depends_only_on_test field
+  // into account: If the graph already has a non-pinned LoadNode and
+  // we add a pinned LoadNode with the same inputs, it's safe for GVN
+  // to replace the pinned LoadNode with the non-pinned LoadNode,
+  // otherwise it wouldn't be safe to have a non pinned LoadNode with
+  // those inputs in the first place. If the graph already has a
+  // pinned LoadNode and we add a non pinned LoadNode with the same
+  // inputs, it's safe (but suboptimal) for GVN to replace the
+  // non-pinned LoadNode by the pinned LoadNode.
+  bool _depends_only_on_test;
+
   // On platforms with weak memory ordering (e.g., PPC, Ia64) we distinguish
   // loads that can be reordered, and such requiring acquire semantics to
   // adhere to the Java specification.  The required behaviour is stored in
@@ -153,8 +179,8 @@
   const Type* const _type;      // What kind of value is loaded?
 public:
 
-  LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo)
-    : MemNode(c,mem,adr,at), _type(rt), _mo(mo) {
+  LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo, ControlDependency control_dependency)
+    : MemNode(c,mem,adr,at), _type(rt), _mo(mo), _depends_only_on_test(control_dependency == DependsOnlyOnTest) {
     init_class_id(Class_Load);
   }
   inline bool is_unordered() const { return !is_acquire(); }
@@ -165,7 +191,8 @@
 
   // Polymorphic factory method:
    static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
-                     const TypePtr* at, const Type *rt, BasicType bt, MemOrd mo);
+                     const TypePtr* at, const Type *rt, BasicType bt,
+                     MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest);
 
   virtual uint hash()   const;  // Check the type
 
@@ -233,16 +260,15 @@
   // which produce results (new raw memory state) inside of loops preventing all
   // manner of other optimizations).  Basically, it's ugly but so is the alternative.
   // See comment in macro.cpp, around line 125 expand_allocate_common().
-  virtual bool depends_only_on_test() const { return adr_type() != TypeRawPtr::BOTTOM; }
-
+  virtual bool depends_only_on_test() const { return adr_type() != TypeRawPtr::BOTTOM && _depends_only_on_test; }
 };
 
 //------------------------------LoadBNode--------------------------------------
 // Load a byte (8bits signed) from memory
 class LoadBNode : public LoadNode {
 public:
-  LoadBNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo)
-    : LoadNode(c, mem, adr, at, ti, mo) {}
+  LoadBNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, ti, mo, control_dependency) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -255,8 +281,8 @@
 // Load a unsigned byte (8bits unsigned) from memory
 class LoadUBNode : public LoadNode {
 public:
-  LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti, MemOrd mo)
-    : LoadNode(c, mem, adr, at, ti, mo) {}
+  LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, ti, mo, control_dependency) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
@@ -269,8 +295,8 @@
 // Load an unsigned short/char (16bits unsigned) from memory
 class LoadUSNode : public LoadNode {
 public:
-  LoadUSNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo)
-    : LoadNode(c, mem, adr, at, ti, mo) {}
+  LoadUSNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, ti, mo, control_dependency) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -283,8 +309,8 @@
 // Load a short (16bits signed) from memory
 class LoadSNode : public LoadNode {
 public:
-  LoadSNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo)
-    : LoadNode(c, mem, adr, at, ti, mo) {}
+  LoadSNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, ti, mo, control_dependency) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
@@ -297,8 +323,8 @@
 // Load an integer from memory
 class LoadINode : public LoadNode {
 public:
-  LoadINode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo)
-    : LoadNode(c, mem, adr, at, ti, mo) {}
+  LoadINode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, ti, mo, control_dependency) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual int store_Opcode() const { return Op_StoreI; }
@@ -330,15 +356,15 @@
 
 public:
   LoadLNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeLong *tl,
-            MemOrd mo, bool require_atomic_access = false)
-    : LoadNode(c, mem, adr, at, tl, mo), _require_atomic_access(require_atomic_access) {}
+            MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, bool require_atomic_access = false)
+    : LoadNode(c, mem, adr, at, tl, mo, control_dependency), _require_atomic_access(require_atomic_access) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegL; }
   virtual int store_Opcode() const { return Op_StoreL; }
   virtual BasicType memory_type() const { return T_LONG; }
-  bool require_atomic_access() { return _require_atomic_access; }
+  bool require_atomic_access() const { return _require_atomic_access; }
   static LoadLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
-                                const Type* rt, MemOrd mo);
+                                const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest);
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const {
     LoadNode::dump_spec(st);
@@ -351,8 +377,8 @@
 // Load a long from unaligned memory
 class LoadL_unalignedNode : public LoadLNode {
 public:
-  LoadL_unalignedNode(Node *c, Node *mem, Node *adr, const TypePtr* at, MemOrd mo)
-    : LoadLNode(c, mem, adr, at, TypeLong::LONG, mo) {}
+  LoadL_unalignedNode(Node *c, Node *mem, Node *adr, const TypePtr* at, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadLNode(c, mem, adr, at, TypeLong::LONG, mo, control_dependency) {}
   virtual int Opcode() const;
 };
 
@@ -360,8 +386,8 @@
 // Load a float (64 bits) from memory
 class LoadFNode : public LoadNode {
 public:
-  LoadFNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t, MemOrd mo)
-    : LoadNode(c, mem, adr, at, t, mo) {}
+  LoadFNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, t, mo, control_dependency) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegF; }
   virtual int store_Opcode() const { return Op_StoreF; }
@@ -371,21 +397,39 @@
 //------------------------------LoadDNode--------------------------------------
 // Load a double (64 bits) from memory
 class LoadDNode : public LoadNode {
+  virtual uint hash() const { return LoadNode::hash() + _require_atomic_access; }
+  virtual uint cmp( const Node &n ) const {
+    return _require_atomic_access == ((LoadDNode&)n)._require_atomic_access
+      && LoadNode::cmp(n);
+  }
+  virtual uint size_of() const { return sizeof(*this); }
+  const bool _require_atomic_access;  // is piecewise load forbidden?
+
 public:
-  LoadDNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t, MemOrd mo)
-    : LoadNode(c, mem, adr, at, t, mo) {}
+  LoadDNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t,
+            MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, bool require_atomic_access = false)
+    : LoadNode(c, mem, adr, at, t, mo, control_dependency), _require_atomic_access(require_atomic_access) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegD; }
   virtual int store_Opcode() const { return Op_StoreD; }
   virtual BasicType memory_type() const { return T_DOUBLE; }
+  bool require_atomic_access() const { return _require_atomic_access; }
+  static LoadDNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
+                                const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest);
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const {
+    LoadNode::dump_spec(st);
+    if (_require_atomic_access)  st->print(" Atomic!");
+  }
+#endif
 };
 
 //------------------------------LoadD_unalignedNode----------------------------
 // Load a double from unaligned memory
 class LoadD_unalignedNode : public LoadDNode {
 public:
-  LoadD_unalignedNode(Node *c, Node *mem, Node *adr, const TypePtr* at, MemOrd mo)
-    : LoadDNode(c, mem, adr, at, Type::DOUBLE, mo) {}
+  LoadD_unalignedNode(Node *c, Node *mem, Node *adr, const TypePtr* at, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadDNode(c, mem, adr, at, Type::DOUBLE, mo, control_dependency) {}
   virtual int Opcode() const;
 };
 
@@ -393,8 +437,8 @@
 // Load a pointer from memory (either object or array)
 class LoadPNode : public LoadNode {
 public:
-  LoadPNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypePtr* t, MemOrd mo)
-    : LoadNode(c, mem, adr, at, t, mo) {}
+  LoadPNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypePtr* t, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, t, mo, control_dependency) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegP; }
   virtual int store_Opcode() const { return Op_StoreP; }
@@ -406,8 +450,8 @@
 // Load a narrow oop from memory (either object or array)
 class LoadNNode : public LoadNode {
 public:
-  LoadNNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const Type* t, MemOrd mo)
-    : LoadNode(c, mem, adr, at, t, mo) {}
+  LoadNNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const Type* t, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, t, mo, control_dependency) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegN; }
   virtual int store_Opcode() const { return Op_StoreN; }
@@ -582,7 +626,7 @@
     : StoreNode(c, mem, adr, at, val, mo), _require_atomic_access(require_atomic_access) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_LONG; }
-  bool require_atomic_access() { return _require_atomic_access; }
+  bool require_atomic_access() const { return _require_atomic_access; }
   static StoreLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo);
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const {
@@ -605,11 +649,28 @@
 //------------------------------StoreDNode-------------------------------------
 // Store double to memory
 class StoreDNode : public StoreNode {
+  virtual uint hash() const { return StoreNode::hash() + _require_atomic_access; }
+  virtual uint cmp( const Node &n ) const {
+    return _require_atomic_access == ((StoreDNode&)n)._require_atomic_access
+      && StoreNode::cmp(n);
+  }
+  virtual uint size_of() const { return sizeof(*this); }
+  const bool _require_atomic_access;  // is piecewise store forbidden?
 public:
-  StoreDNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, MemOrd mo)
-    : StoreNode(c, mem, adr, at, val, mo) {}
+  StoreDNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val,
+             MemOrd mo, bool require_atomic_access = false)
+    : StoreNode(c, mem, adr, at, val, mo), _require_atomic_access(require_atomic_access) {}
   virtual int Opcode() const;
   virtual BasicType memory_type() const { return T_DOUBLE; }
+  bool require_atomic_access() const { return _require_atomic_access; }
+  static StoreDNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo);
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const {
+    StoreNode::dump_spec(st);
+    if (_require_atomic_access)  st->print(" Atomic!");
+  }
+#endif
+
 };
 
 //------------------------------StorePNode-------------------------------------
--- a/src/share/vm/opto/parse3.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/parse3.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -233,7 +233,7 @@
   // Build the load.
   //
   MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
-  Node* ld = make_load(NULL, adr, type, bt, adr_type, mo, is_vol);
+  Node* ld = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, is_vol);
 
   // Adjust Java stack
   if (type2size[bt] == 1)
--- a/src/share/vm/opto/superword.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/superword.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -1431,7 +1431,7 @@
         }
         Node* adr = low_adr->in(MemNode::Address);
         const TypePtr* atyp = n->adr_type();
-        vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
+        vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
         vlen_in_bytes = vn->as_LoadVector()->memory_size();
       } else if (n->is_Store()) {
         // Promote value to be stored to vector
@@ -2029,6 +2029,19 @@
   return n;
 }
 
+LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
+  LoadNode::ControlDependency dep = LoadNode::DependsOnlyOnTest;
+  for (uint i = 0; i < p->size(); i++) {
+    Node* n = p->at(i);
+    assert(n->is_Load(), "only meaningful for loads");
+    if (!n->depends_only_on_test()) {
+      dep = LoadNode::Pinned;
+    }
+  }
+  return dep;
+}
+
+
 //----------------------------align_initial_loop_index---------------------------
 // Adjust pre-loop limit so that in main loop, a load/store reference
 // to align_to_ref will be a position zero in the vector.
--- a/src/share/vm/opto/superword.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/superword.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -399,6 +399,7 @@
   Node* executed_first(Node_List* p);
   // Return the node executed last in pack p.
   Node* executed_last(Node_List* p);
+  static LoadNode::ControlDependency control_dependency(Node_List* p);
   // Alignment within a vector memory reference
   int memory_alignment(MemNode* s, int iv_adjust);
   // (Start, end] half-open range defining which operands are vector
--- a/src/share/vm/opto/vectornode.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/vectornode.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -403,9 +403,10 @@
 
 // Return the vector version of a scalar load node.
 LoadVectorNode* LoadVectorNode::make(Compile* C, int opc, Node* ctl, Node* mem,
-                                     Node* adr, const TypePtr* atyp, uint vlen, BasicType bt) {
+                                     Node* adr, const TypePtr* atyp, uint vlen, BasicType bt,
+                                     ControlDependency control_dependency) {
   const TypeVect* vt = TypeVect::make(bt, vlen);
-  return new (C) LoadVectorNode(ctl, mem, adr, atyp, vt);
+  return new (C) LoadVectorNode(ctl, mem, adr, atyp, vt, control_dependency);
 }
 
 // Return the vector version of a scalar store node.
--- a/src/share/vm/opto/vectornode.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/opto/vectornode.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -355,8 +355,8 @@
 // Load Vector from memory
 class LoadVectorNode : public LoadNode {
  public:
-  LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt)
-    : LoadNode(c, mem, adr, at, vt, MemNode::unordered) {
+  LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt, ControlDependency control_dependency = LoadNode::DependsOnlyOnTest)
+    : LoadNode(c, mem, adr, at, vt, MemNode::unordered, control_dependency) {
     init_class_id(Class_LoadVector);
   }
 
@@ -372,7 +372,8 @@
   virtual int store_Opcode() const { return Op_StoreVector; }
 
   static LoadVectorNode* make(Compile* C, int opc, Node* ctl, Node* mem,
-                              Node* adr, const TypePtr* atyp, uint vlen, BasicType bt);
+                              Node* adr, const TypePtr* atyp, uint vlen, BasicType bt,
+                              ControlDependency control_dependency = LoadNode::DependsOnlyOnTest);
 };
 
 //------------------------------StoreVectorNode--------------------------------
--- a/src/share/vm/prims/forte.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/prims/forte.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -172,8 +172,27 @@
   // Now do we have a useful PcDesc?
   if (pc_desc == NULL ||
       pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
-    // No debug information available for this pc
-    // vframeStream would explode if we try and walk the frames.
+    // No debug information is available for this PC.
+    //
+    // vframeStreamCommon::fill_from_frame() will decode the frame depending
+    // on the state of the thread.
+    //
+    // Case #1: If the thread is in Java (state == _thread_in_Java), then
+    // the vframeStreamCommon object will be filled as if the frame were a native
+    // compiled frame. Therefore, no debug information is needed.
+    //
+    // Case #2: If the thread is in any other state, then two steps will be performed:
+    // - if asserts are enabled, found_bad_method_frame() will be called and
+    //   the assert in found_bad_method_frame() will be triggered;
+    // - if asserts are disabled, the vframeStreamCommon object will be filled
+    //   as if it were a native compiled frame.
+    //
+    // Case (2) is similar to the way interpreter frames are processed in
+    // vframeStreamCommon::fill_from_interpreter_frame in case no valid BCI
+    // was found for an interpreted frame. If asserts are enabled, the assert
+    // in found_bad_method_frame() will be triggered. If asserts are disabled,
+    // the vframeStreamCommon object will be filled afterwards as if the
+    // interpreter were at the point of entering into the method.
     return false;
   }
 
@@ -230,9 +249,10 @@
     // a valid method. Then again we may have caught an interpreter
     // frame in the middle of construction and the bci field is
     // not yet valid.
-
-    *method_p = method;
     if (!method->is_valid_method()) return false;
+    *method_p = method; // If the Method* found is invalid, it is
+                        // ignored by forte_fill_call_trace_given_top().
+                        // So set method_p only if the Method is valid.
 
     intptr_t bcx = fr->interpreter_frame_bcx();
 
@@ -247,18 +267,33 @@
 }
 
 
-// Determine if 'fr' can be used to find an initial Java frame.
-// Return false if it can not find a fully decipherable Java frame
-// (in other words a frame that isn't safe to use in a vframe stream).
-// Obviously if it can't even find a Java frame false will also be returned.
+// Determine if a Java frame can be found starting with the frame 'fr'.
+//
+// Check the return value of find_initial_Java_frame and the value of
+// 'method_p' to decide on how use the results returned by this method.
+//
+// If 'method_p' is not NULL, an initial Java frame has been found and
+// the stack can be walked starting from that initial frame. In this case,
+// 'method_p' points to the Method that the initial frame belongs to and
+// the initial Java frame is returned in initial_frame_p.
+//
+// find_initial_Java_frame() returns true if a Method has been found (i.e.,
+// 'method_p' is not NULL) and the initial frame that belongs to that Method
+// is decipherable.
 //
-// If we find a Java frame decipherable or not then by definition we have
-// identified a method and that will be returned to the caller via method_p.
-// If we can determine a bci that is returned also. (Hmm is it possible
-// to return a method and bci and still return false? )
+// A frame is considered to be decipherable:
+//
+// - if the frame is a compiled frame and a PCDesc is available;
+//
+// - if the frame is an interpreter frame that is valid or the thread is
+//   state (_thread_in_native || state == _thread_in_vm || state == _thread_blocked).
 //
-// The initial Java frame we find (if any) is return via initial_frame_p.
+// Note that find_initial_Java_frame() can return false even if an initial
+// Java method was found (e.g., there is no PCDesc available for the method).
 //
+// If 'method_p' is NULL, it was not possible to find a Java frame when
+// walking the stack starting from 'fr'. In this case find_initial_Java_frame
+// returns false.
 
 static bool find_initial_Java_frame(JavaThread* thread,
                                     frame* fr,
@@ -278,8 +313,6 @@
   // recognizable to us. This should only happen if we are in a JRT_LEAF
   // or something called by a JRT_LEAF method.
 
-
-
   frame candidate = *fr;
 
   // If the starting frame we were given has no codeBlob associated with
@@ -334,9 +367,11 @@
       nmethod* nm = (nmethod*) candidate.cb();
       *method_p = nm->method();
 
-      // If the frame isn't fully decipherable then the default
-      // value for the bci is a signal that we don't have a bci.
-      // If we have a decipherable frame this bci value will
+      // If the frame is not decipherable, then the value of -1
+      // for the BCI is used to signal that no BCI is available.
+      // Furthermore, the method returns false in this case.
+      //
+      // If a decipherable frame is available, the BCI value will
       // not be used.
 
       *bci_p = -1;
@@ -347,9 +382,9 @@
 
       if (nm->is_native_method()) return true;
 
-      // If it isn't decipherable then we have found a pc that doesn't
-      // have a PCDesc that can get us a bci however we did find
-      // a method
+      // If the frame is not decipherable, then a PC was found
+      // that does not have a PCDesc from which a BCI can be obtained.
+      // Nevertheless, a Method was found.
 
       if (!is_decipherable_compiled_frame(thread, &candidate, nm)) {
         return false;
@@ -358,7 +393,7 @@
       // is_decipherable_compiled_frame may modify candidate's pc
       *initial_frame_p = candidate;
 
-      assert(nm->pc_desc_at(candidate.pc()) != NULL, "if it's decipherable then pc must be valid");
+      assert(nm->pc_desc_at(candidate.pc()) != NULL, "debug information must be available if the frame is decipherable");
 
       return true;
     }
@@ -388,17 +423,17 @@
 
   frame initial_Java_frame;
   Method* method;
-  int bci;
+  int bci = -1; // assume BCI is not available for method
+                // update with correct information if available
   int count;
 
   count = 0;
   assert(trace->frames != NULL, "trace->frames must be non-NULL");
 
-  bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
+  // Walk the stack starting from 'top_frame' and search for an initial Java frame.
+  find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
 
-  // The frame might not be walkable but still recovered a method
-  // (e.g. an nmethod with no scope info for the pc)
-
+  // Check if a Java Method has been found.
   if (method == NULL) return;
 
   if (!method->is_valid_method()) {
@@ -406,29 +441,6 @@
     return;
   }
 
-  // We got a Java frame however it isn't fully decipherable
-  // so it won't necessarily be safe to use it for the
-  // initial frame in the vframe stream.
-
-  if (!fully_decipherable) {
-    // Take whatever method the top-frame decoder managed to scrape up.
-    // We look further at the top frame only if non-safepoint
-    // debugging information is available.
-    count++;
-    trace->num_frames = count;
-    trace->frames[0].method_id = method->find_jmethod_id_or_null();
-    if (!method->is_native()) {
-      trace->frames[0].lineno = bci;
-    } else {
-      trace->frames[0].lineno = -3;
-    }
-
-    if (!initial_Java_frame.safe_for_sender(thd)) return;
-
-    RegisterMap map(thd, false);
-    initial_Java_frame = initial_Java_frame.sender(&map);
-  }
-
   vframeStreamForte st(thd, initial_Java_frame, false);
 
   for (; !st.at_end() && count < depth; st.forte_next(), count++) {
--- a/src/share/vm/runtime/globals.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/runtime/globals.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -3940,7 +3940,11 @@
           "Enable event-based tracing")                                     \
                                                                             \
   product(bool, UseLockedTracing, false,                                    \
-          "Use locked-tracing when doing event-based tracing")
+          "Use locked-tracing when doing event-based tracing")              \
+                                                                            \
+  product_pd(bool, PreserveFramePointer,                                    \
+             "Use the FP register for holding the frame pointer "           \
+             "and not as a general purpose register.")
 
 /*
  *  Macros for factoring of globals
--- a/src/share/vm/runtime/sharedRuntime.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -1230,7 +1230,7 @@
 #endif
 
   // JSR 292 key invariant:
-  // If the resolved method is a MethodHandle invoke target the call
+  // If the resolved method is a MethodHandle invoke target, the call
   // site must be a MethodHandle call site, because the lambda form might tail-call
   // leaving the stack in a state unknown to either caller or callee
   // TODO detune for now but we might need it again
--- a/src/share/vm/runtime/vframe.hpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/runtime/vframe.hpp	Wed Jun 10 23:13:44 2015 -0700
@@ -389,12 +389,12 @@
       decode_offset < 0 ||
       decode_offset >= nm()->scopes_data_size()) {
     // 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
-    // If we attempt to read nmethod::scopes_data at serialized_null (== 0),
-    // or if we read some at other crazy offset,
-    // we will decode garbage and make wild references into the heap,
-    // leading to crashes in product mode.
-    // (This isn't airtight, of course, since there are internal
-    // offsets which are also crazy.)
+    // If we read nmethod::scopes_data at serialized_null (== 0)
+    // or if read some at other invalid offset, invalid values will be decoded.
+    // Based on these values, invalid heap locations could be referenced
+    // that could lead to crashes in product mode.
+    // Therefore, do not use the decode offset if invalid, but fill the frame
+    // as it were a native compiled frame (no Java-level assumptions).
 #ifdef ASSERT
     if (WizardMode) {
       tty->print_cr("Error in fill_from_frame: pc_desc for "
@@ -514,9 +514,15 @@
   intptr_t  bcx    = _frame.interpreter_frame_bcx();
   int       bci    = method->validate_bci_from_bcx(bcx);
   // 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
+  // AsyncGetCallTrace interrupts the VM asynchronously. As a result
+  // it is possible to access an interpreter frame for which
+  // no Java-level information is yet available (e.g., becasue
+  // the frame was being created when the VM interrupted it).
+  // In this scenario, pretend that the interpreter is at the point
+  // of entering the method.
   if (bci < 0) {
     found_bad_method_frame();
-    bci = 0;  // pretend it's on the point of entering
+    bci = 0;
   }
   _mode   = interpreted_mode;
   _method = method;
--- a/src/share/vm/utilities/elfFile.cpp	Mon Jun 08 12:06:37 2015 -0700
+++ b/src/share/vm/utilities/elfFile.cpp	Wed Jun 10 23:13:44 2015 -0700
@@ -261,7 +261,12 @@
       }
     }
   }
+// AARCH64 defaults to noexecstack. All others default to execstack.
+#ifdef AARCH64
+  return true;
+#else
   return false;
+#endif
 }
 #endif
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/unsafe/TestUnsafeLoadControl.java	Wed Jun 10 23:13:44 2015 -0700
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8077504
+ * @summary Unsafe load can loose control dependency and cause crash
+ * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestUnsafeLoadControl
+ *
+ */
+
+import java.lang.reflect.Field;
+import sun.misc.Unsafe;
+
+public class TestUnsafeLoadControl {
+
+    private static final Unsafe UNSAFE;
+
+    static {
+        try {
+            Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
+            unsafeField.setAccessible(true);
+            UNSAFE = (Unsafe) unsafeField.get(null);
+        } catch(Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    static int val;
+    static void test1(int[] a, boolean[] flags, boolean flag, long j) {
+        for (int i = 0; i < 10; i++) {
+            if (flags[i]) {
+                if (flag) {
+                    long address = (j << 2) + UNSAFE.ARRAY_INT_BASE_OFFSET;
+                    int v = UNSAFE.getInt(a, address);
+                    val = v;
+                }
+            }
+        }
+    }
+
+    static int test2(int[] a, boolean[] flags, boolean flag, long j) {
+        int sum = 0;
+        for (int i = 0; i < 10; i++) {
+            if (flags[i]) {
+                if (flag) {
+                    long address = (j << 2) + UNSAFE.ARRAY_INT_BASE_OFFSET;
+                    int v = UNSAFE.getInt(a, address);
+                    if (v == 0) {
+                        sum++;
+                    }
+                }
+            }
+        }
+        return sum;
+    }
+
+    static public void main(String[] args) {
+        boolean[] flags = new boolean[10];
+        for (int i = 0; i < flags.length; i++) {
+            flags[i] = true;
+        }
+        int[] array = new int[10];
+        for (int i = 0; i < 20000; i++) {
+            test1(array, flags, true, 0);
+        }
+        for (int i = 0; i < flags.length; i++) {
+            flags[i] = false;
+        }
+        test1(array, flags, true, Long.MAX_VALUE/4);
+
+        for (int i = 0; i < flags.length; i++) {
+            flags[i] = true;
+        }
+        for (int i = 0; i < 20000; i++) {
+            test2(array, flags, true, 0);
+        }
+        for (int i = 0; i < flags.length; i++) {
+            flags[i] = false;
+        }
+        test2(array, flags, true, Long.MAX_VALUE/4);
+    }
+}