changeset 420:a1980da045cc

6462850: generate biased locking code in C2 ideal graph Summary: Inline biased locking code in C2 ideal graph during macro nodes expansion Reviewed-by: never
author kvn
date Fri, 07 Nov 2008 09:29:38 -0800
parents 0bf25c4807f9
children 577f3a2e0662
files src/cpu/sparc/vm/assembler_sparc.cpp src/cpu/sparc/vm/assembler_sparc.hpp src/cpu/sparc/vm/sparc.ad src/cpu/x86/vm/assembler_x86.cpp src/cpu/x86/vm/assembler_x86.hpp src/cpu/x86/vm/x86_32.ad src/cpu/x86/vm/x86_64.ad src/os_cpu/linux_x86/vm/linux_x86_32.ad src/os_cpu/solaris_x86/vm/solaris_x86_32.ad src/share/vm/adlc/formssel.cpp src/share/vm/opto/c2_globals.hpp src/share/vm/opto/callnode.cpp src/share/vm/opto/callnode.hpp src/share/vm/opto/classes.hpp src/share/vm/opto/compile.cpp src/share/vm/opto/library_call.cpp src/share/vm/opto/loopTransform.cpp src/share/vm/opto/macro.cpp src/share/vm/opto/macro.hpp src/share/vm/opto/matcher.cpp src/share/vm/opto/memnode.cpp src/share/vm/opto/memnode.hpp src/share/vm/opto/type.hpp src/share/vm/runtime/arguments.cpp src/share/vm/utilities/vmError.cpp
diffstat 25 files changed, 477 insertions(+), 153 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -2615,7 +2615,8 @@
   }
 }
 
-void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg,
+void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
+                                          Register temp_reg,
                                           Label& done, Label* slow_case,
                                           BiasedLockingCounters* counters) {
   assert(UseBiasedLocking, "why call this otherwise?");
@@ -2691,8 +2692,7 @@
                   markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
                   mark_reg);
   or3(G2_thread, mark_reg, temp_reg);
-  casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
-                  (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+  casn(mark_addr.base(), mark_reg, temp_reg);
   // If the biasing toward our thread failed, this means that
   // another thread succeeded in biasing it toward itself and we
   // need to revoke that bias. The revocation will occur in the
@@ -2721,8 +2721,7 @@
   load_klass(obj_reg, temp_reg);
   ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
   or3(G2_thread, temp_reg, temp_reg);
-  casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
-                  (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+  casn(mark_addr.base(), mark_reg, temp_reg);
   // If the biasing toward our thread failed, this means that
   // another thread succeeded in biasing it toward itself and we
   // need to revoke that bias. The revocation will occur in the
@@ -2752,8 +2751,7 @@
   // bits in this situation. Should attempt to preserve them.
   load_klass(obj_reg, temp_reg);
   ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
-  casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
-                  (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+  casn(mark_addr.base(), mark_reg, temp_reg);
   // Fall through to the normal CAS-based lock, because no matter what
   // the result of the above CAS, some thread must have succeeded in
   // removing the bias bit from the object's header.
@@ -2815,8 +2813,10 @@
 // effect).
 
 
-void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch,
-                                          BiasedLockingCounters* counters) {
+void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
+                                          Register Rbox, Register Rscratch,
+                                          BiasedLockingCounters* counters,
+                                          bool try_bias) {
    Address mark_addr(Roop, 0, oopDesc::mark_offset_in_bytes());
 
    verify_oop(Roop);
@@ -2838,7 +2838,7 @@
      // Fetch object's markword
      ld_ptr(mark_addr, Rmark);
 
-     if (UseBiasedLocking) {
+     if (try_bias) {
         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
      }
 
@@ -2881,7 +2881,7 @@
 
       ld_ptr (mark_addr, Rmark);           // fetch obj->mark
       // Triage: biased, stack-locked, neutral, inflated
-      if (UseBiasedLocking) {
+      if (try_bias) {
         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
         // Invariant: if control reaches this point in the emitted stream
         // then Rmark has not been modified.
@@ -2945,7 +2945,7 @@
       ld_ptr (mark_addr, Rmark);           // fetch obj->mark
       // Triage: biased, stack-locked, neutral, inflated
 
-      if (UseBiasedLocking) {
+      if (try_bias) {
         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
         // Invariant: if control reaches this point in the emitted stream
         // then Rmark has not been modified.
@@ -3039,7 +3039,9 @@
    bind   (done) ;
 }
 
-void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch) {
+void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
+                                            Register Rbox, Register Rscratch,
+                                            bool try_bias) {
    Address mark_addr(Roop, 0, oopDesc::mark_offset_in_bytes());
 
    Label done ;
@@ -3050,7 +3052,7 @@
    }
 
    if (EmitSync & 8) {
-     if (UseBiasedLocking) {
+     if (try_bias) {
         biased_locking_exit(mark_addr, Rscratch, done);
      }
 
@@ -3077,7 +3079,7 @@
    // I$ effects.
    Label LStacked ;
 
-   if (UseBiasedLocking) {
+   if (try_bias) {
       // TODO: eliminate redundant LDs of obj->mark
       biased_locking_exit(mark_addr, Rscratch, done);
    }
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Nov 07 09:29:38 2008 -0800
@@ -2220,9 +2220,13 @@
 
   // These set the icc condition code to equal if the lock succeeded
   // and notEqual if it failed and requires a slow case
-  void compiler_lock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch,
-                              BiasedLockingCounters* counters = NULL);
-  void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch);
+  void compiler_lock_object(Register Roop, Register Rmark, Register Rbox,
+                            Register Rscratch,
+                            BiasedLockingCounters* counters = NULL,
+                            bool try_bias = UseBiasedLocking);
+  void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox,
+                              Register Rscratch,
+                              bool try_bias = UseBiasedLocking);
 
   // Biased locking support
   // Upon entry, lock_reg must point to the lock record on the stack,
--- a/src/cpu/sparc/vm/sparc.ad	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Nov 07 09:29:38 2008 -0800
@@ -395,6 +395,7 @@
                   );
 
 reg_class g1_regL(R_G1H,R_G1);
+reg_class g3_regL(R_G3H,R_G3);
 reg_class o2_regL(R_O2H,R_O2);
 reg_class o7_regL(R_O7H,R_O7);
 
@@ -2688,7 +2689,7 @@
     assert(Rbox  != Rscratch, "");
     assert(Rbox  != Rmark, "");
 
-    __ compiler_lock_object(Roop, Rmark, Rbox, Rscratch, _counters);
+    __ compiler_lock_object(Roop, Rmark, Rbox, Rscratch, _counters, UseBiasedLocking && !UseOptoBiasInlining);
 %}
 
 enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
@@ -2704,7 +2705,7 @@
     assert(Rbox  != Rscratch, "");
     assert(Rbox  != Rmark, "");
 
-    __ compiler_unlock_object(Roop, Rmark, Rbox, Rscratch);
+    __ compiler_unlock_object(Roop, Rmark, Rbox, Rscratch, UseBiasedLocking && !UseOptoBiasInlining);
   %}
 
   enc_class enc_cas( iRegP mem, iRegP old, iRegP new ) %{
@@ -2716,8 +2717,7 @@
     // casx_under_lock picks 1 of 3 encodings:
     // For 32-bit pointers you get a 32-bit CAS
     // For 64-bit pointers you get a 64-bit CASX
-    __ casx_under_lock(Rmem, Rold, Rnew, // Swap(*Rmem,Rnew) if *Rmem == Rold
-                        (address) StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+    __ casn(Rmem, Rold, Rnew); // Swap(*Rmem,Rnew) if *Rmem == Rold
     __ cmp( Rold, Rnew );
   %}
 
@@ -3766,6 +3766,14 @@
   interface(REG_INTER);
 %}
 
+operand g3RegL() %{
+  constraint(ALLOC_IN_RC(g3_regL));
+  match(iRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Int Register safe
 // This is 64bit safe
 operand iRegIsafe() %{
@@ -6602,32 +6610,23 @@
   ins_pipe( long_memory_op );
 %}
 
-instruct storeLConditional_bool(iRegP mem_ptr, iRegL oldval, iRegL newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{
-  match(Set res (StoreLConditional mem_ptr (Binary oldval newval)));
-  effect( USE mem_ptr, KILL ccr, KILL tmp1);
-  // Marshal the register pairs into V9 64-bit registers, then do the compare-and-swap
-  format %{
-            "MOV    $newval,R_O7\n\t"
-            "CASXA  [$mem_ptr],$oldval,R_O7\t! If $oldval==[$mem_ptr] Then store R_O7 into [$mem_ptr], set R_O7=[$mem_ptr] in any case\n\t"
-            "CMP    $oldval,R_O7\t\t! See if we made progress\n\t"
-            "MOV    1,$res\n\t"
-            "MOVne  xcc,R_G0,$res"
-  %}
-  ins_encode( enc_casx(mem_ptr, oldval, newval),
-              enc_lflags_ne_to_boolean(res) );
+// Conditional-store of an int value.
+instruct storeIConditional( iRegP mem_ptr, iRegI oldval, g3RegI newval, flagsReg icc ) %{
+  match(Set icc (StoreIConditional mem_ptr (Binary oldval newval)));
+  effect( KILL newval );
+  format %{ "CASA   [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr], set $newval=[$mem_ptr] in any case\n\t"
+            "CMP    $oldval,$newval\t\t! See if we made progress"  %}
+  ins_encode( enc_cas(mem_ptr,oldval,newval) );
   ins_pipe( long_memory_op );
 %}
 
-instruct storeLConditional_flags(iRegP mem_ptr, iRegL oldval, iRegL newval, flagsRegL xcc, o7RegI tmp1, immI0 zero) %{
-  match(Set xcc (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero));
-  effect( USE mem_ptr, KILL tmp1);
-  // Marshal the register pairs into V9 64-bit registers, then do the compare-and-swap
-  format %{
-            "MOV    $newval,R_O7\n\t"
-            "CASXA  [$mem_ptr],$oldval,R_O7\t! If $oldval==[$mem_ptr] Then store R_O7 into [$mem_ptr], set R_O7=[$mem_ptr] in any case\n\t"
-            "CMP    $oldval,R_O7\t\t! See if we made progress"
-  %}
-  ins_encode( enc_casx(mem_ptr, oldval, newval));
+// Conditional-store of a long value.
+instruct storeLConditional( iRegP mem_ptr, iRegL oldval, g3RegL newval, flagsRegL xcc ) %{
+  match(Set xcc (StoreLConditional mem_ptr (Binary oldval newval)));
+  effect( KILL newval );
+  format %{ "CASXA  [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr], set $newval=[$mem_ptr] in any case\n\t"
+            "CMP    $oldval,$newval\t\t! See if we made progress"  %}
+  ins_encode( enc_cas(mem_ptr,oldval,newval) );
   ins_pipe( long_memory_op );
 %}
 
@@ -7410,6 +7409,34 @@
   ins_pipe(ialu_reg_imm);
 %}
 
+#ifndef _LP64
+
+// Use sp_ptr_RegP to match G2 (TLS register) without spilling.
+instruct orI_reg_castP2X(iRegI dst, iRegI src1, sp_ptr_RegP src2) %{
+  match(Set dst (OrI src1 (CastP2X src2)));
+
+  size(4);
+  format %{ "OR     $src1,$src2,$dst" %}
+  opcode(Assembler::or_op3, Assembler::arith_op);
+  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
+  ins_pipe(ialu_reg_reg);
+%}
+
+#else
+
+instruct orL_reg_castP2X(iRegL dst, iRegL src1, sp_ptr_RegP src2) %{
+  match(Set dst (OrL src1 (CastP2X src2)));
+
+  ins_cost(DEFAULT_COST);
+  size(4);
+  format %{ "OR     $src1,$src2,$dst\t! long" %}
+  opcode(Assembler::or_op3, Assembler::arith_op);
+  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
+  ins_pipe(ialu_reg_reg);
+%}
+
+#endif
+
 // Xor Instructions
 // Register Xor
 instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -621,6 +621,10 @@
     debug_only(has_disp32 = true);
     break;
 
+  case 0xF0:                    // Lock
+    assert(os::is_MP(), "only on MP");
+    goto again_after_prefix;
+
   case 0xF3:                    // For SSE
   case 0xF2:                    // For SSE2
     switch (0xFF & *ip++) {
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Nov 07 09:29:38 2008 -0800
@@ -1780,7 +1780,8 @@
   // check info (currently consumed only by C1). If
   // swap_reg_contains_mark is true then returns -1 as it is assumed
   // the calling code has already passed any potential faults.
-  int biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, Register tmp_reg,
+  int biased_locking_enter(Register lock_reg, Register obj_reg,
+                           Register swap_reg, Register tmp_reg,
                            bool swap_reg_contains_mark,
                            Label& done, Label* slow_case = NULL,
                            BiasedLockingCounters* counters = NULL);
--- a/src/cpu/x86/vm/x86_32.ad	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Nov 07 09:29:38 2008 -0800
@@ -3313,7 +3313,7 @@
       // Beware -- there's a subtle invariant that fetch of the markword
       // at [FETCH], below, will never observe a biased encoding (*101b).
       // If this invariant is not held we risk exclusion (safety) failure.
-      if (UseBiasedLocking) {
+      if (UseBiasedLocking && !UseOptoBiasInlining) {
         masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
       }
 
@@ -3534,7 +3534,7 @@
 
       // Critically, the biased locking test must have precedence over
       // and appear before the (box->dhw == 0) recursive stack-lock test.
-      if (UseBiasedLocking) {
+      if (UseBiasedLocking && !UseOptoBiasInlining) {
          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
       }
       
@@ -7930,33 +7930,36 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
-// Conditional-store of a long value
-// Returns a boolean value (0/1) on success.  Implemented with a CMPXCHG8 on Intel.
-// mem_ptr can actually be in either ESI or EDI
-instruct storeLConditional( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
-  match(Set res (StoreLConditional mem_ptr (Binary oldval newval)));
-  effect(KILL cr);
-  // EDX:EAX is killed if there is contention, but then it's also unused.
-  // In the common case of no contention, EDX:EAX holds the new oop address.
-  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
-            "MOV    $res,0\n\t"
-            "JNE,s  fail\n\t"
-            "MOV    $res,1\n"
-          "fail:" %}
-  ins_encode( enc_cmpxchg8(mem_ptr),
-              enc_flags_ne_to_boolean(res) );
+// Conditional-store of an int value.
+// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
+instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
+  effect(KILL oldval);
+  format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
+  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
   ins_pipe( pipe_cmpxchg );
 %}
 
-// Conditional-store of a long value
-// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel.
-// mem_ptr can actually be in either ESI or EDI
-instruct storeLConditional_flags( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr, immI0 zero ) %{
-  match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero));
-  // EDX:EAX is killed if there is contention, but then it's also unused.
-  // In the common case of no contention, EDX:EAX holds the new oop address.
-  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
-  ins_encode( enc_cmpxchg8(mem_ptr) );
+// Conditional-store of a long value.
+// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
+instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
+  effect(KILL oldval);
+  format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
+            "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
+            "XCHG   EBX,ECX"
+  %}
+  ins_encode %{
+    // Note: we need to swap rbx, and rcx before and after the
+    //       cmpxchg8 instruction because the instruction uses
+    //       rcx as the high order word of the new value to store but
+    //       our register encoding uses rbx.
+    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
+    if( os::is_MP() )
+      __ lock();
+    __ cmpxchg8(Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp));
+    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
+  %}
   ins_pipe( pipe_cmpxchg );
 %}
 
@@ -8423,6 +8426,7 @@
   ins_pipe( ialu_reg );
 %}
 
+
 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 // This idiom is used by the compiler for the i2b bytecode.
 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour, eFlagsReg cr) %{
@@ -8540,6 +8544,18 @@
   ins_pipe( ialu_reg_reg );
 %}
 
+instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
+  match(Set dst (OrI dst (CastP2X src)));
+  effect(KILL cr);
+
+  size(2);
+  format %{ "OR     $dst,$src" %}
+  opcode(0x0B);
+  ins_encode( OpcP, RegReg( dst, src) );
+  ins_pipe( ialu_reg_reg );
+%}
+
+
 // Or Register with Immediate
 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (OrI dst src));
--- a/src/cpu/x86/vm/x86_64.ad	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Nov 07 09:29:38 2008 -0800
@@ -3572,7 +3572,7 @@
         // at [FETCH], below, will never observe a biased encoding (*101b).
         // If this invariant is not held we'll suffer exclusion (safety) failure.
 
-        if (UseBiasedLocking) {
+        if (UseBiasedLocking && !UseOptoBiasInlining) {
           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
           masm.movptr(tmpReg, Address(objReg, 0)) ;        // [FETCH]
         }
@@ -3660,7 +3660,7 @@
     } else {
        Label DONE_LABEL, Stacked, CheckSucc ;
 
-       if (UseBiasedLocking) {
+       if (UseBiasedLocking && !UseOptoBiasInlining) {
          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
        }
         
@@ -7845,7 +7845,7 @@
                            rFlagsReg cr)
 %{
   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
-
+ 
   format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
             "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
   opcode(0x0F, 0xB1);
@@ -7856,53 +7856,40 @@
   ins_pipe(pipe_cmpxchg);
 %}
 
-// Conditional-store of a long value
-// Returns a boolean value (0/1) on success.  Implemented with a
-// CMPXCHG8 on Intel.  mem_ptr can actually be in either RSI or RDI
-
-instruct storeLConditional(rRegI res,
-                           memory mem_ptr,
-                           rax_RegL oldval, rRegL newval,
-                           rFlagsReg cr)
-%{
-  match(Set res (StoreLConditional mem_ptr (Binary oldval newval)));
-  effect(KILL cr);
-
-  format %{ "cmpxchgq $mem_ptr, $newval\t# (long) "
-            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
-            "sete    $res\n\t"
-            "movzbl  $res, $res" %}
+// Conditional-store of an int value.
+// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
+instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
+%{
+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
+  effect(KILL oldval);
+
+  format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
   opcode(0x0F, 0xB1);
   ins_encode(lock_prefix,
-             REX_reg_mem_wide(newval, mem_ptr),
+             REX_reg_mem(newval, mem),
              OpcP, OpcS,
-             reg_mem(newval, mem_ptr),
-             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
-             REX_reg_breg(res, res), // movzbl
-             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
+             reg_mem(newval, mem));
   ins_pipe(pipe_cmpxchg);
 %}
 
-// Conditional-store of a long value
-// ZF flag is set on success, reset otherwise. Implemented with a
-// CMPXCHG8 on Intel.  mem_ptr can actually be in either RSI or RDI
-instruct storeLConditional_flags(memory mem_ptr,
-                                 rax_RegL oldval, rRegL newval,
-                                 rFlagsReg cr,
-                                 immI0 zero)
-%{
-  match(Set cr (CmpI (StoreLConditional mem_ptr (Binary oldval newval)) zero));
-
-  format %{ "cmpxchgq $mem_ptr, $newval\t# (long) "
-            "If rax == $mem_ptr then store $newval into $mem_ptr" %}
+// Conditional-store of a long value.
+// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
+instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
+%{
+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
+  effect(KILL oldval);
+
+  format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
   opcode(0x0F, 0xB1);
   ins_encode(lock_prefix,
-             REX_reg_mem_wide(newval, mem_ptr),
+             REX_reg_mem_wide(newval, mem),
              OpcP, OpcS,
-             reg_mem(newval, mem_ptr));
+             reg_mem(newval, mem));
   ins_pipe(pipe_cmpxchg);
 %}
 
+
+// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 instruct compareAndSwapP(rRegI res,
                          memory mem_ptr,
                          rax_RegP oldval, rRegP newval,
@@ -7926,7 +7913,6 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
-// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 instruct compareAndSwapL(rRegI res,
                          memory mem_ptr,
                          rax_RegL oldval, rRegL newval,
@@ -8876,6 +8862,7 @@
   ins_pipe(ialu_reg);
 %}
 
+
 // Logical Shift Right by 8-bit immediate
 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 %{
@@ -9585,6 +9572,18 @@
   ins_pipe(ialu_reg_reg);
 %}
 
+// Use any_RegP to match R15 (TLS register) without spilling.
+instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
+  match(Set dst (OrL dst (CastP2X src)));
+  effect(KILL cr);
+
+  format %{ "orq     $dst, $src\t# long" %}
+  opcode(0x0B);
+  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
+  ins_pipe(ialu_reg_reg);
+%}
+
+
 // Or Register with Immediate
 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 %{
--- a/src/os_cpu/linux_x86/vm/linux_x86_32.ad	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/os_cpu/linux_x86/vm/linux_x86_32.ad	Fri Nov 07 09:29:38 2008 -0800
@@ -103,16 +103,16 @@
 // This name is KNOWN by the ADLC and cannot be changed.
 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
 // for this guy.
-instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{
+instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
   match(Set dst (ThreadLocal));
   effect(DEF dst, KILL cr);
 
-  format %{ "MOV    EAX, Thread::current()" %}
+  format %{ "MOV    $dst, Thread::current()" %}
   ins_encode( linux_tlsencode(dst) );
   ins_pipe( ialu_reg_fat );
 %}
 
-instruct TLS(eAXRegP dst) %{
+instruct TLS(eRegP dst) %{
   match(Set dst (ThreadLocal));
 
   expand %{
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad	Fri Nov 07 09:29:38 2008 -0800
@@ -110,16 +110,16 @@
 // This name is KNOWN by the ADLC and cannot be changed.
 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
 // for this guy.
-instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{
+instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
   match(Set dst (ThreadLocal));
   effect(DEF dst, KILL cr);
 
-  format %{ "MOV    EAX, Thread::current()" %}
+  format %{ "MOV    $dst, Thread::current()" %}
   ins_encode( solaris_tlsencode(dst) );
   ins_pipe( ialu_reg_fat );
 %}
 
-instruct TLS(eAXRegP dst) %{
+instruct TLS(eRegP dst) %{
   match(Set dst (ThreadLocal));
 
   expand %{
--- a/src/share/vm/adlc/formssel.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/adlc/formssel.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -3324,7 +3324,7 @@
     "Load8B" ,"Load4B" ,"Load8C" ,"Load4C" ,"Load2C" ,"Load8S", "Load4S","Load2S",
     "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
     "LoadPLocked", "LoadLLocked",
-    "StorePConditional", "StoreLConditional",
+    "StorePConditional", "StoreIConditional", "StoreLConditional",
     "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN",
     "StoreCM",
     "ClearArray"
--- a/src/share/vm/opto/c2_globals.hpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Nov 07 09:29:38 2008 -0800
@@ -388,6 +388,9 @@
   product(intx, EliminateAllocationArraySizeLimit, 64,                      \
           "Array size (number of elements) limit for scalar replacement")   \
                                                                             \
+  product(bool, UseOptoBiasInlining, true,                                 \
+          "Generate biased locking code in C2 ideal graph")                 \
+                                                                            \
   product(intx, ValueSearchLimit, 1000,                                     \
           "Recursion limit in PhaseMacroExpand::value_from_mem_phi")        \
                                                                             \
--- a/src/share/vm/opto/callnode.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/callnode.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -967,6 +967,7 @@
   init_class_id(Class_SafePointScalarObject);
 }
 
+bool SafePointScalarObjectNode::pinned() const { return true; }
 
 uint SafePointScalarObjectNode::ideal_reg() const {
   return 0; // No matching to machine instruction
--- a/src/share/vm/opto/callnode.hpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/callnode.hpp	Fri Nov 07 09:29:38 2008 -0800
@@ -433,6 +433,10 @@
   uint n_fields()    const { return _n_fields; }
   DEBUG_ONLY(AllocateNode* alloc() const { return _alloc; })
 
+  // SafePointScalarObject should be always pinned to the control edge
+  // of the SafePoint node for which it was generated.
+  virtual bool pinned() const; // { return true; }
+
   virtual uint size_of() const { return sizeof(*this); }
 
   // Assumes that "this" is an argument to a safepoint node "s", and that
--- a/src/share/vm/opto/classes.hpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/classes.hpp	Fri Nov 07 09:29:38 2008 -0800
@@ -205,6 +205,7 @@
 macro(StoreC)
 macro(StoreCM)
 macro(StorePConditional)
+macro(StoreIConditional)
 macro(StoreLConditional)
 macro(StoreD)
 macro(StoreF)
--- a/src/share/vm/opto/compile.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/compile.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -2001,6 +2001,7 @@
   case Op_StorePConditional:
   case Op_StoreI:
   case Op_StoreL:
+  case Op_StoreIConditional:
   case Op_StoreLConditional:
   case Op_CompareAndSwapI:
   case Op_CompareAndSwapL:
--- a/src/share/vm/opto/library_call.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/library_call.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -3485,11 +3485,32 @@
   const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
   int alias_idx = C->get_alias_index(adr_type);
 
-  Node *result = _gvn.transform(new (C, 5) StoreLConditionalNode(control(), memory(alias_idx), adr, newVal, oldVal));
-  Node *store_proj = _gvn.transform( new (C, 1) SCMemProjNode(result));
+  Node *cas = _gvn.transform(new (C, 5) StoreLConditionalNode(control(), memory(alias_idx), adr, newVal, oldVal));
+  Node *store_proj = _gvn.transform( new (C, 1) SCMemProjNode(cas));
   set_memory(store_proj, alias_idx);
-
-  push(result);
+  Node *bol = _gvn.transform( new (C, 2) BoolNode( cas, BoolTest::eq ) );
+
+  Node *result;
+  // CMove node is not used to be able fold a possible check code
+  // after attemptUpdate() call. This code could be transformed
+  // into CMove node by loop optimizations.
+  {
+    RegionNode *r = new (C, 3) RegionNode(3);
+    result = new (C, 3) PhiNode(r, TypeInt::BOOL);
+
+    Node *iff = create_and_xform_if(control(), bol, PROB_FAIR, COUNT_UNKNOWN);
+    Node *iftrue = opt_iff(r, iff);
+    r->init_req(1, iftrue);
+    result->init_req(1, intcon(1));
+    result->init_req(2, intcon(0));
+
+    set_control(_gvn.transform(r));
+    record_for_igvn(r);
+
+    C->set_has_split_ifs(true); // Has chance for split-if optimization
+  }
+
+  push(_gvn.transform(result));
   return true;
 }
 
--- a/src/share/vm/opto/loopTransform.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/loopTransform.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -1519,6 +1519,7 @@
         Node *bol = iff->in(1);
         if( bol && bol->req() > 1 && bol->in(1) &&
             ((bol->in(1)->Opcode() == Op_StorePConditional ) ||
+             (bol->in(1)->Opcode() == Op_StoreIConditional ) ||
              (bol->in(1)->Opcode() == Op_StoreLConditional ) ||
              (bol->in(1)->Opcode() == Op_CompareAndSwapI ) ||
              (bol->in(1)->Opcode() == Op_CompareAndSwapL ) ||
--- a/src/share/vm/opto/macro.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/macro.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -82,16 +82,31 @@
   }
 }
 
-Node* PhaseMacroExpand::opt_iff(Node* region, Node* iff) {
-  IfNode *opt_iff = transform_later(iff)->as_If();
+Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) {
+  Node* cmp;
+  if (mask != 0) {
+    Node* and_node = transform_later(new (C, 3) AndXNode(word, MakeConX(mask)));
+    cmp = transform_later(new (C, 3) CmpXNode(and_node, MakeConX(bits)));
+  } else {
+    cmp = word;
+  }
+  Node* bol = transform_later(new (C, 2) BoolNode(cmp, BoolTest::ne));
+  IfNode* iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
+  transform_later(iff);
 
-  // Fast path taken; set region slot 2
-  Node *fast_taken = transform_later( new (C, 1) IfFalseNode(opt_iff) );
-  region->init_req(2,fast_taken); // Capture fast-control
+  // Fast path taken.
+  Node *fast_taken = transform_later( new (C, 1) IfFalseNode(iff) );
 
   // Fast path not-taken, i.e. slow path
-  Node *slow_taken = transform_later( new (C, 1) IfTrueNode(opt_iff) );
-  return slow_taken;
+  Node *slow_taken = transform_later( new (C, 1) IfTrueNode(iff) );
+
+  if (return_fast_path) {
+    region->init_req(edge, slow_taken); // Capture slow-control
+    return fast_taken;
+  } else {
+    region->init_req(edge, fast_taken); // Capture fast-control
+    return slow_taken;
+  }
 }
 
 //--------------------copy_predefined_input_for_runtime_call--------------------
@@ -854,7 +869,7 @@
 
 Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) {
   Node* adr = basic_plus_adr(base, offset);
-  const TypePtr* adr_type = TypeRawPtr::BOTTOM;
+  const TypePtr* adr_type = adr->bottom_type()->is_ptr();
   Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt);
   transform_later(value);
   return value;
@@ -1583,12 +1598,194 @@
   Node* flock = lock->fastlock_node();
 
   // Make the merge point
-  Node *region = new (C, 3) RegionNode(3);
+  Node *region;
+  Node *mem_phi;
+  Node *slow_path;
+
+  if (UseOptoBiasInlining) {
+    /*
+     *  See the full descrition in MacroAssembler::biased_locking_enter().
+     *
+     *  if( (mark_word & biased_lock_mask) == biased_lock_pattern ) {
+     *    // The object is biased.
+     *    proto_node = klass->prototype_header;
+     *    o_node = thread | proto_node;
+     *    x_node = o_node ^ mark_word;
+     *    if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ?
+     *      // Done.
+     *    } else {
+     *      if( (x_node & biased_lock_mask) != 0 ) {
+     *        // The klass's prototype header is no longer biased.
+     *        cas(&mark_word, mark_word, proto_node)
+     *        goto cas_lock;
+     *      } else {
+     *        // The klass's prototype header is still biased.
+     *        if( (x_node & epoch_mask) != 0 ) { // Expired epoch?
+     *          old = mark_word;
+     *          new = o_node;
+     *        } else {
+     *          // Different thread or anonymous biased.
+     *          old = mark_word & (epoch_mask | age_mask | biased_lock_mask);
+     *          new = thread | old;
+     *        }
+     *        // Try to rebias.
+     *        if( cas(&mark_word, old, new) == 0 ) {
+     *          // Done.
+     *        } else {
+     *          goto slow_path; // Failed.
+     *        }
+     *      }
+     *    }
+     *  } else {
+     *    // The object is not biased.
+     *    cas_lock:
+     *    if( FastLock(obj) == 0 ) {
+     *      // Done.
+     *    } else {
+     *      slow_path:
+     *      OptoRuntime::complete_monitor_locking_Java(obj);
+     *    }
+     *  }
+     */
+
+    region  = new (C, 5) RegionNode(5);
+    // create a Phi for the memory state
+    mem_phi = new (C, 5) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+
+    Node* fast_lock_region  = new (C, 3) RegionNode(3);
+    Node* fast_lock_mem_phi = new (C, 3) PhiNode( fast_lock_region, Type::MEMORY, TypeRawPtr::BOTTOM);
+
+    // First, check mark word for the biased lock pattern.
+    Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type());
+
+    // Get fast path - mark word has the biased lock pattern.
+    ctrl = opt_bits_test(ctrl, fast_lock_region, 1, mark_node,
+                         markOopDesc::biased_lock_mask_in_place,
+                         markOopDesc::biased_lock_pattern, true);
+    // fast_lock_region->in(1) is set to slow path.
+    fast_lock_mem_phi->init_req(1, mem);
+
+    // Now check that the lock is biased to the current thread and has
+    // the same epoch and bias as Klass::_prototype_header.
+
+    // Special-case a fresh allocation to avoid building nodes:
+    Node* klass_node = AllocateNode::Ideal_klass(obj, &_igvn);
+    if (klass_node == NULL) {
+      Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes());
+      klass_node = transform_later( LoadKlassNode::make(_igvn, mem, k_adr, _igvn.type(k_adr)->is_ptr()) );
+      klass_node->init_req(0, ctrl);
+    }
+    Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type());
+
+    Node* thread = transform_later(new (C, 1) ThreadLocalNode());
+    Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
+    Node* o_node = transform_later(new (C, 3) OrXNode(cast_thread, proto_node));
+    Node* x_node = transform_later(new (C, 3) XorXNode(o_node, mark_node));
+
+    // Get slow path - mark word does NOT match the value.
+    Node* not_biased_ctrl =  opt_bits_test(ctrl, region, 3, x_node,
+                                      (~markOopDesc::age_mask_in_place), 0);
+    // region->in(3) is set to fast path - the object is biased to the current thread.
+    mem_phi->init_req(3, mem);
+
+
+    // Mark word does NOT match the value (thread | Klass::_prototype_header).
+
 
-  Node *bol = transform_later(new (C, 2) BoolNode(flock,BoolTest::ne));
-  Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
-  // Optimize test; set region slot 2
-  Node *slow_path = opt_iff(region,iff);
+    // First, check biased pattern.
+    // Get fast path - _prototype_header has the same biased lock pattern.
+    ctrl =  opt_bits_test(not_biased_ctrl, fast_lock_region, 2, x_node,
+                          markOopDesc::biased_lock_mask_in_place, 0, true);
+
+    not_biased_ctrl = fast_lock_region->in(2); // Slow path
+    // fast_lock_region->in(2) - the prototype header is no longer biased
+    // and we have to revoke the bias on this object.
+    // We are going to try to reset the mark of this object to the prototype
+    // value and fall through to the CAS-based locking scheme.
+    Node* adr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
+    Node* cas = new (C, 5) StoreXConditionalNode(not_biased_ctrl, mem, adr,
+                                                 proto_node, mark_node);
+    transform_later(cas);
+    Node* proj = transform_later( new (C, 1) SCMemProjNode(cas));
+    fast_lock_mem_phi->init_req(2, proj);
+
+
+    // Second, check epoch bits.
+    Node* rebiased_region  = new (C, 3) RegionNode(3);
+    Node* old_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X);
+    Node* new_phi = new (C, 3) PhiNode( rebiased_region, TypeX_X);
+
+    // Get slow path - mark word does NOT match epoch bits.
+    Node* epoch_ctrl =  opt_bits_test(ctrl, rebiased_region, 1, x_node,
+                                      markOopDesc::epoch_mask_in_place, 0);
+    // The epoch of the current bias is not valid, attempt to rebias the object
+    // toward the current thread.
+    rebiased_region->init_req(2, epoch_ctrl);
+    old_phi->init_req(2, mark_node);
+    new_phi->init_req(2, o_node);
+
+    // rebiased_region->in(1) is set to fast path.
+    // The epoch of the current bias is still valid but we know
+    // nothing about the owner; it might be set or it might be clear.
+    Node* cmask   = MakeConX(markOopDesc::biased_lock_mask_in_place |
+                             markOopDesc::age_mask_in_place |
+                             markOopDesc::epoch_mask_in_place);
+    Node* old = transform_later(new (C, 3) AndXNode(mark_node, cmask));
+    cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
+    Node* new_mark = transform_later(new (C, 3) OrXNode(cast_thread, old));
+    old_phi->init_req(1, old);
+    new_phi->init_req(1, new_mark);
+
+    transform_later(rebiased_region);
+    transform_later(old_phi);
+    transform_later(new_phi);
+
+    // Try to acquire the bias of the object using an atomic operation.
+    // If this fails we will go in to the runtime to revoke the object's bias.
+    cas = new (C, 5) StoreXConditionalNode(rebiased_region, mem, adr,
+                                           new_phi, old_phi);
+    transform_later(cas);
+    proj = transform_later( new (C, 1) SCMemProjNode(cas));
+
+    // Get slow path - Failed to CAS.
+    not_biased_ctrl = opt_bits_test(rebiased_region, region, 4, cas, 0, 0);
+    mem_phi->init_req(4, proj);
+    // region->in(4) is set to fast path - the object is rebiased to the current thread.
+
+    // Failed to CAS.
+    slow_path  = new (C, 3) RegionNode(3);
+    Node *slow_mem = new (C, 3) PhiNode( slow_path, Type::MEMORY, TypeRawPtr::BOTTOM);
+
+    slow_path->init_req(1, not_biased_ctrl); // Capture slow-control
+    slow_mem->init_req(1, proj);
+
+    // Call CAS-based locking scheme (FastLock node).
+
+    transform_later(fast_lock_region);
+    transform_later(fast_lock_mem_phi);
+
+    // Get slow path - FastLock failed to lock the object.
+    ctrl = opt_bits_test(fast_lock_region, region, 2, flock, 0, 0);
+    mem_phi->init_req(2, fast_lock_mem_phi);
+    // region->in(2) is set to fast path - the object is locked to the current thread.
+
+    slow_path->init_req(2, ctrl); // Capture slow-control
+    slow_mem->init_req(2, fast_lock_mem_phi);
+
+    transform_later(slow_path);
+    transform_later(slow_mem);
+    // Reset lock's memory edge.
+    lock->set_req(TypeFunc::Memory, slow_mem);
+
+  } else {
+    region  = new (C, 3) RegionNode(3);
+    // create a Phi for the memory state
+    mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+
+    // Optimize test; set region slot 2
+    slow_path = opt_bits_test(ctrl, region, 2, flock, 0, 0);
+    mem_phi->init_req(2, mem);
+  }
 
   // Make slow path call
   CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box );
@@ -1614,16 +1811,11 @@
   transform_later(region);
   _igvn.subsume_node(_fallthroughproj, region);
 
-  // create a Phi for the memory state
-  Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
-  Node *memproj = transform_later( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+  Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) );
   mem_phi->init_req(1, memproj );
-  mem_phi->init_req(2, mem);
   transform_later(mem_phi);
-    _igvn.hash_delete(_memproj_fallthrough);
+  _igvn.hash_delete(_memproj_fallthrough);
   _igvn.subsume_node(_memproj_fallthrough, mem_phi);
-
-
 }
 
 //------------------------------expand_unlock_node----------------------
@@ -1637,14 +1829,31 @@
   // No need for a null check on unlock
 
   // Make the merge point
-  RegionNode *region = new (C, 3) RegionNode(3);
+  Node *region;
+  Node *mem_phi;
+
+  if (UseOptoBiasInlining) {
+    // Check for biased locking unlock case, which is a no-op.
+    // See the full descrition in MacroAssembler::biased_locking_exit().
+    region  = new (C, 4) RegionNode(4);
+    // create a Phi for the memory state
+    mem_phi = new (C, 4) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+    mem_phi->init_req(3, mem);
+
+    Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type());
+    ctrl = opt_bits_test(ctrl, region, 3, mark_node,
+                         markOopDesc::biased_lock_mask_in_place,
+                         markOopDesc::biased_lock_pattern);
+  } else {
+    region  = new (C, 3) RegionNode(3);
+    // create a Phi for the memory state
+    mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+  }
 
   FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box );
   funlock = transform_later( funlock )->as_FastUnlock();
-  Node *bol = transform_later(new (C, 2) BoolNode(funlock,BoolTest::ne));
-  Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
   // Optimize test; set region slot 2
-  Node *slow_path = opt_iff(region,iff);
+  Node *slow_path = opt_bits_test(ctrl, region, 2, funlock, 0, 0);
 
   CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box );
 
@@ -1666,16 +1875,12 @@
   transform_later(region);
   _igvn.subsume_node(_fallthroughproj, region);
 
-  // create a Phi for the memory state
-  Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
   Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) );
   mem_phi->init_req(1, memproj );
   mem_phi->init_req(2, mem);
   transform_later(mem_phi);
-    _igvn.hash_delete(_memproj_fallthrough);
+  _igvn.hash_delete(_memproj_fallthrough);
   _igvn.subsume_node(_memproj_fallthrough, mem_phi);
-
-
 }
 
 //------------------------------expand_macro_nodes----------------------
--- a/src/share/vm/opto/macro.hpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/macro.hpp	Fri Nov 07 09:29:38 2008 -0800
@@ -93,7 +93,7 @@
 
   int replace_input(Node *use, Node *oldref, Node *newref);
   void copy_call_debug_info(CallNode *oldcall, CallNode * newcall);
-  Node* opt_iff(Node* region, Node* iff);
+  Node* opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path = false);
   void copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call);
   CallNode* make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call,
                        const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1);
--- a/src/share/vm/opto/matcher.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/matcher.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -1951,6 +1951,7 @@
       // Now hack a few special opcodes
       switch( n->Opcode() ) {       // Handle some opcodes special
       case Op_StorePConditional:
+      case Op_StoreIConditional:
       case Op_StoreLConditional:
       case Op_CompareAndSwapI:
       case Op_CompareAndSwapL:
--- a/src/share/vm/opto/memnode.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/memnode.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -227,6 +227,14 @@
   const Type *t_adr = phase->type( address );
   if( t_adr == Type::TOP )              return NodeSentinel; // caller will return NULL
 
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  if( can_reshape && igvn != NULL && igvn->_worklist.member(address) ) {
+    // The address's base and type may change when the address is processed.
+    // Delay this mem node transformation until the address is processed.
+    phase->is_IterGVN()->_worklist.push(this);
+    return NodeSentinel; // caller will return NULL
+  }
+
   // Avoid independent memory operations
   Node* old_mem = mem;
 
--- a/src/share/vm/opto/memnode.hpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/memnode.hpp	Fri Nov 07 09:29:38 2008 -0800
@@ -632,6 +632,17 @@
   virtual uint ideal_reg() const { return Op_RegFlags; }
 };
 
+//------------------------------StoreIConditionalNode---------------------------
+// Conditionally store int to memory, if no change since prior
+// load-locked.  Sets flags for success or failure of the store.
+class StoreIConditionalNode : public LoadStoreNode {
+public:
+  StoreIConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ii ) : LoadStoreNode(c, mem, adr, val, ii) { }
+  virtual int Opcode() const;
+  // Produces flags
+  virtual uint ideal_reg() const { return Op_RegFlags; }
+};
+
 //------------------------------StoreLConditionalNode---------------------------
 // Conditionally store long to memory, if no change since prior
 // load-locked.  Sets flags for success or failure of the store.
@@ -639,6 +650,8 @@
 public:
   StoreLConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { }
   virtual int Opcode() const;
+  // Produces flags
+  virtual uint ideal_reg() const { return Op_RegFlags; }
 };
 
 
--- a/src/share/vm/opto/type.hpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/opto/type.hpp	Fri Nov 07 09:29:38 2008 -0800
@@ -1183,6 +1183,9 @@
 #define RShiftXNode  RShiftLNode
 // For card marks and hashcodes
 #define URShiftXNode URShiftLNode
+// UseOptoBiasInlining
+#define XorXNode     XorLNode
+#define StoreXConditionalNode StoreLConditionalNode
 // Opcodes
 #define Op_LShiftX   Op_LShiftL
 #define Op_AndX      Op_AndL
@@ -1222,6 +1225,9 @@
 #define RShiftXNode  RShiftINode
 // For card marks and hashcodes
 #define URShiftXNode URShiftINode
+// UseOptoBiasInlining
+#define XorXNode     XorINode
+#define StoreXConditionalNode StoreIConditionalNode
 // Opcodes
 #define Op_LShiftX   Op_LShiftI
 #define Op_AndX      Op_AndI
--- a/src/share/vm/runtime/arguments.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -2628,6 +2628,12 @@
   FLAG_SET_DEFAULT(UseBiasedLocking, false);
 #endif /* CC_INTERP */
 
+#ifdef COMPILER2
+  if (!UseBiasedLocking || EmitSync != 0) {
+    UseOptoBiasInlining = false;
+  }
+#endif
+
   if (PrintCommandLineFlags) {
     CommandLineFlags::printSetFlags();
   }
--- a/src/share/vm/utilities/vmError.cpp	Thu Nov 06 20:00:03 2008 -0800
+++ b/src/share/vm/utilities/vmError.cpp	Fri Nov 07 09:29:38 2008 -0800
@@ -263,7 +263,7 @@
          st->print("# java.lang.OutOfMemoryError: ");
          if (_size) {
            st->print("requested ");
-           sprintf(buf,"%d",_size);
+           sprintf(buf,SIZE_FORMAT,_size);
            st->print(buf);
            st->print(" bytes");
            if (_message != NULL) {