changeset 3595:a93a6d2c9e6c

Merge
author jiangli
date Tue, 24 Jul 2012 13:16:26 -0400
parents aba91a731143 (diff) 611e8a669a2c (current diff)
children bcd1b9d98558
files
diffstat 35 files changed, 1529 insertions(+), 1220 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Jul 16 15:31:18 2012 -0400
+++ b/.hgtags	Tue Jul 24 13:16:26 2012 -0400
@@ -258,3 +258,8 @@
 831e5c76a20af18f3c08c5a95ed31be0e128a010 jdk8-b44
 9d5f20961bc5846fa8d098d534effafbbdae0a58 jdk8-b45
 40e5a3f2907ed02b335c7caa8ecf068cc801380d hs24-b15
+cf37a594c38db2ea926954154636f9f81da2e032 jdk8-b46
+0c7bb1f4f9c8062b5c5bfa56b3bdca44839b4109 jdk8-b47
+66b0450071c1534e014b131892cc86b63f1d009c hs24-b16
+1e26f61bbb521642639f56fae11326f1932f5a7d jdk8-b48
+bd54fe36b5e50f9ef1e30a5047b27fee5297e268 hs24-b17
--- a/make/hotspot_version	Mon Jul 16 15:31:18 2012 -0400
+++ b/make/hotspot_version	Tue Jul 24 13:16:26 2012 -0400
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=24
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=16
+HS_BUILD_NUMBER=18
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -2573,6 +2573,13 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x6C);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::push(int32_t imm32) {
   // in 64bits we push 64bits onto the stack but only
   // take a 32bit immediate
@@ -3178,6 +3185,13 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx2() || (!vector256) && VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
+  emit_byte(0xEF);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
   bool vector256 = true;
@@ -3189,6 +3203,17 @@
   emit_byte(0x01);
 }
 
+void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx2(), "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  emit_byte(0x38);
+  emit_byte(0xC0 | encode);
+  // 0x00 - insert into lower 128 bits
+  // 0x01 - insert into upper 128 bits
+  emit_byte(0x01);
+}
+
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
@@ -7480,6 +7505,24 @@
   movb(as_Address(dst), src);
 }
 
+void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movdl(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movdl(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movq(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movq(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
     if (UseXmmLoadAndClearUpper) {
--- a/src/cpu/x86/vm/assembler_x86.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -1466,6 +1466,9 @@
   void punpckldq(XMMRegister dst, XMMRegister src);
   void punpckldq(XMMRegister dst, Address src);
 
+  // Interleave Low Quadwords
+  void punpcklqdq(XMMRegister dst, XMMRegister src);
+
 #ifndef _LP64 // no 32bit push/pop on amd64
   void pushl(Address src);
 #endif
@@ -1606,13 +1609,11 @@
 
   void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
 
-  // AVX 3-operands instructions (encoded with VEX prefix)
+  // AVX 3-operands scalar instructions (encoded with VEX prefix)
   void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
   void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vaddss(XMMRegister dst, XMMRegister nds, Address src);
   void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vandps(XMMRegister dst, XMMRegister nds, Address src);
   void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
   void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vdivss(XMMRegister dst, XMMRegister nds, Address src);
@@ -1625,13 +1626,17 @@
   void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
 
   // AVX Vector instrucitons.
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
   // AVX instruction which is used to clear upper 128 bits of YMM registers and
   // to avoid transaction penalty between AVX and SSE states. There is no
@@ -2563,6 +2568,20 @@
   void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
   void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
+  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
+
+  // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
+  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+    if (UseAVX > 1) // vinserti128h is available only in AVX2
+      Assembler::vinserti128h(dst, nds, src);
+    else
+      Assembler::vinsertf128h(dst, nds, src);
+  }
 
   // Data
 
@@ -2615,6 +2634,13 @@
   // to avoid hiding movb
   void movbyte(ArrayAddress dst, int src);
 
+  // Import other mov() methods from the parent class or else
+  // they will be hidden by the following overriding declaration.
+  using Assembler::movdl;
+  using Assembler::movq;
+  void movdl(XMMRegister dst, AddressLiteral src);
+  void movq(XMMRegister dst, AddressLiteral src);
+
   // Can push value or effective address
   void pushptr(AddressLiteral src);
 
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -562,7 +562,7 @@
         AllocatePrefetchInstr = 3;
       }
       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
-      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+      if( supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
         UseXMMForArrayCopy = true;
       }
       if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
--- a/src/cpu/x86/vm/x86.ad	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/cpu/x86/vm/x86.ad	Tue Jul 24 13:16:26 2012 -0400
@@ -71,244 +71,244 @@
 //              XMM0-XMM3 might hold parameters
 
 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
-reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
-reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next());
-reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next());
-reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next());
-reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
+reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
+reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
+reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
+reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
+reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
+reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
 
 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
-reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
-reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next());
-reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next());
-reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next());
-reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
+reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
+reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
+reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
+reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
+reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
+reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 
 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
-reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
-reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next());
-reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next());
-reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next());
-reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
+reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
+reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
+reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
+reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
+reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
+reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 
 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
-reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
-reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next());
-reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next());
-reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next());
-reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
+reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
+reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
+reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
+reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
+reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
+reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 
 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
-reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
-reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next());
-reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next());
-reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next());
-reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
+reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
+reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
+reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
+reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
+reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
+reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 
 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
-reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
-reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next());
-reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next());
-reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next());
-reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
+reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
+reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
+reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
+reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
+reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
+reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 
 #ifdef _WIN64
 
 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next());
-reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
-reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
-reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
-reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
+reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
+reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
+reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
+reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
+reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
+reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 
 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next());
-reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
-reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
-reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
-reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
+reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
+reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
+reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
+reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
+reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
+reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 
 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
-reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next());
-reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
-reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
-reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
-reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
+reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
+reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
+reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
+reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
+reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
+reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 
 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
-reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next());
-reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
-reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
-reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
-reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
+reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
+reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
+reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
+reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
+reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
+reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 
 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
-reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
-reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
-reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
-reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
+reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
+reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
+reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
+reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
+reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
+reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 
 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
-reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
-reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
-reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
-reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
+reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
+reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
+reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
+reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
+reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
+reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 
 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
-reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
-reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
-reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
-reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
+reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
+reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
+reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
+reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
+reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
+reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 
 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
-reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
-reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
-reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
-reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
+reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
+reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
+reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
+reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
+reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
+reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 
 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
-reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
-reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
-reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
-reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
+reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
+reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
+reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
+reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
+reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
+reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 
 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
-reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
-reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
-reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
-reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
+reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
+reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
+reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
+reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
+reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
+reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 
 #else // _WIN64
 
 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
-reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
-reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
-reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
-reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
+reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
+reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
+reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
+reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
+reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
+reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 
 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
-reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
-reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
-reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
-reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
+reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
+reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
+reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
+reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
+reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
+reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 
 #ifdef _LP64
 
 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
-reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next());
-reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
-reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
-reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
-reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
+reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
+reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
+reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
+reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
+reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
+reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 
 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
-reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next());
-reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
-reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
-reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
-reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
+reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
+reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
+reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
+reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
+reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
+reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 
 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
-reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
-reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
-reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
-reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
+reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
+reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
+reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
+reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
+reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
+reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 
 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
-reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
-reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
-reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
-reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
+reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
+reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
+reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
+reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
+reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
+reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 
 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
-reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
-reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
-reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
-reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
+reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
+reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
+reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
+reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
+reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
+reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 
 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
-reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
-reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
-reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
-reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
+reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
+reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
+reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
+reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
+reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
+reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 
 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
-reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
-reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
-reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
-reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
+reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
+reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
+reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
+reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
+reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
+reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 
 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
-reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
-reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
-reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
-reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
+reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
+reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
+reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
+reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
+reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
+reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 
 #endif // _LP64
 
@@ -889,7 +889,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_reg(regF dst, regF src1, regF src2) %{
+instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src1 src2));
 
@@ -901,7 +901,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_mem(regF dst, regF src1, memory src2) %{
+instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src1 (LoadF src2)));
 
@@ -913,7 +913,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_imm(regF dst, regF src, immF con) %{
+instruct addF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src con));
 
@@ -960,7 +960,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_reg(regD dst, regD src1, regD src2) %{
+instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src1 src2));
 
@@ -972,7 +972,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_mem(regD dst, regD src1, memory src2) %{
+instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src1 (LoadD src2)));
 
@@ -984,7 +984,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_imm(regD dst, regD src, immD con) %{
+instruct addD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src con));
 
@@ -1031,7 +1031,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_reg(regF dst, regF src1, regF src2) %{
+instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src1 src2));
 
@@ -1043,7 +1043,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_mem(regF dst, regF src1, memory src2) %{
+instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src1 (LoadF src2)));
 
@@ -1055,7 +1055,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_imm(regF dst, regF src, immF con) %{
+instruct subF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src con));
 
@@ -1102,7 +1102,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_reg(regD dst, regD src1, regD src2) %{
+instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src1 src2));
 
@@ -1114,7 +1114,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_mem(regD dst, regD src1, memory src2) %{
+instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src1 (LoadD src2)));
 
@@ -1126,7 +1126,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_imm(regD dst, regD src, immD con) %{
+instruct subD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src con));
 
@@ -1173,7 +1173,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_reg(regF dst, regF src1, regF src2) %{
+instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src1 src2));
 
@@ -1185,7 +1185,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_mem(regF dst, regF src1, memory src2) %{
+instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src1 (LoadF src2)));
 
@@ -1197,7 +1197,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_imm(regF dst, regF src, immF con) %{
+instruct mulF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src con));
 
@@ -1244,7 +1244,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_reg(regD dst, regD src1, regD src2) %{
+instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src1 src2));
 
@@ -1256,7 +1256,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_mem(regD dst, regD src1, memory src2) %{
+instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src1 (LoadD src2)));
 
@@ -1268,7 +1268,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_imm(regD dst, regD src, immD con) %{
+instruct mulD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src con));
 
@@ -1315,7 +1315,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_reg(regF dst, regF src1, regF src2) %{
+instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src1 src2));
 
@@ -1327,7 +1327,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_mem(regF dst, regF src1, memory src2) %{
+instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src1 (LoadF src2)));
 
@@ -1339,7 +1339,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_imm(regF dst, regF src, immF con) %{
+instruct divF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src con));
 
@@ -1386,7 +1386,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_reg(regD dst, regD src1, regD src2) %{
+instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src1 src2));
 
@@ -1398,7 +1398,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_mem(regD dst, regD src1, memory src2) %{
+instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src1 (LoadD src2)));
 
@@ -1410,7 +1410,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_imm(regD dst, regD src, immD con) %{
+instruct divD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src con));
 
@@ -1433,7 +1433,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsF_reg(regF dst, regF src) %{
+instruct absF_reg_reg(regF dst, regF src) %{
   predicate(UseAVX > 0);
   match(Set dst (AbsF src));
   ins_cost(150);
@@ -1457,7 +1457,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsD_reg(regD dst, regD src) %{
+instruct absD_reg_reg(regD dst, regD src) %{
   predicate(UseAVX > 0);
   match(Set dst (AbsD src));
   ins_cost(150);
@@ -1481,7 +1481,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegF_reg(regF dst, regF src) %{
+instruct negF_reg_reg(regF dst, regF src) %{
   predicate(UseAVX > 0);
   match(Set dst (NegF src));
   ins_cost(150);
@@ -1505,7 +1505,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegD_reg(regD dst, regD src) %{
+instruct negD_reg_reg(regD dst, regD src) %{
   predicate(UseAVX > 0);
   match(Set dst (NegD src));
   ins_cost(150);
@@ -1719,12 +1719,12 @@
   format %{ "movd    $dst,$src\n\t"
             "punpcklbw $dst,$dst\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
-            "movlhps $dst,$dst\t! replicate16B" %}
+            "punpcklqdq $dst,$dst\t! replicate16B" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1735,14 +1735,14 @@
   format %{ "movd    $dst,$src\n\t"
             "punpcklbw $dst,$dst\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate32B" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1751,9 +1751,9 @@
 instruct Repl4B_imm(vecS dst, immI con) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateB con));
-  format %{ "movss   $dst,[$constantaddress]\t! replicate4B($con)" %}
+  format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
   ins_encode %{
-    __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
+    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1761,9 +1761,9 @@
 instruct Repl8B_imm(vecD dst, immI con) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (ReplicateB con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate8B($con)" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1771,11 +1771,11 @@
 instruct Repl16B_imm(vecX dst, immI con) %{
   predicate(n->as_Vector()->length() == 16);
   match(Set dst (ReplicateB con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate16B($con)\n\t"
-            "movlhps $dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1783,13 +1783,13 @@
 instruct Repl32B_imm(vecY dst, immI con) %{
   predicate(n->as_Vector()->length() == 32);
   match(Set dst (ReplicateB con));
-  format %{ "movsd   $dst,[$constantaddress]\t! lreplicate32B($con)\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1828,11 +1828,11 @@
 instruct Repl32B_zero(vecY dst, immI0 zero) %{
   predicate(n->as_Vector()->length() == 32);
   match(Set dst (ReplicateB zero));
-  format %{ "vxorpd  $dst,$dst,$dst\t! replicate32B zero" %}
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
   ins_encode %{
     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
     bool vector256 = true;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -1867,11 +1867,11 @@
   match(Set dst (ReplicateS src));
   format %{ "movd    $dst,$src\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
-            "movlhps $dst,$dst\t! replicate8S" %}
+            "punpcklqdq $dst,$dst\t! replicate8S" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1881,13 +1881,13 @@
   match(Set dst (ReplicateS src));
   format %{ "movd    $dst,$src\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate16S" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1896,9 +1896,9 @@
 instruct Repl2S_imm(vecS dst, immI con) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateS con));
-  format %{ "movss   $dst,[$constantaddress]\t! replicate2S($con)" %}
+  format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
   ins_encode %{
-    __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
+    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -1906,9 +1906,9 @@
 instruct Repl4S_imm(vecD dst, immI con) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateS con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate4S($con)" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -1916,11 +1916,11 @@
 instruct Repl8S_imm(vecX dst, immI con) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (ReplicateS con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate8S($con)\n\t"
-            "movlhps $dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1928,13 +1928,13 @@
 instruct Repl16S_imm(vecY dst, immI con) %{
   predicate(n->as_Vector()->length() == 16);
   match(Set dst (ReplicateS con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate16S($con)\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1973,11 +1973,11 @@
 instruct Repl16S_zero(vecY dst, immI0 zero) %{
   predicate(n->as_Vector()->length() == 16);
   match(Set dst (ReplicateS zero));
-  format %{ "vxorpd  $dst,$dst,$dst\t! replicate16S zero" %}
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
   ins_encode %{
     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
     bool vector256 = true;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -2012,11 +2012,11 @@
   match(Set dst (ReplicateI src));
   format %{ "movd    $dst,$src\n\t"
             "pshufd  $dst,$dst,0x00\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
+            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2025,9 +2025,9 @@
 instruct Repl2I_imm(vecD dst, immI con) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateI con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate2I($con)" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -2035,11 +2035,11 @@
 instruct Repl4I_imm(vecX dst, immI con) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateI con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate4I($con)\n\t"
-            "movlhps $dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
+            "punpcklqdq $dst,$dst" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2047,13 +2047,13 @@
 instruct Repl8I_imm(vecY dst, immI con) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (ReplicateI con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate8I($con)\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2061,7 +2061,7 @@
 // Integer could be loaded into xmm register directly from memory.
 instruct Repl2I_mem(vecD dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateI (LoadVector mem)));
+  match(Set dst (ReplicateI (LoadI mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
   ins_encode %{
@@ -2073,7 +2073,7 @@
 
 instruct Repl4I_mem(vecX dst, memory mem) %{
   predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateI (LoadVector mem)));
+  match(Set dst (ReplicateI (LoadI mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
   ins_encode %{
@@ -2085,14 +2085,14 @@
 
 instruct Repl8I_mem(vecY dst, memory mem) %{
   predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateI (LoadVector mem)));
+  match(Set dst (ReplicateI (LoadI mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
+            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $mem$$Address);
     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2121,11 +2121,11 @@
 instruct Repl8I_zero(vecY dst, immI0 zero) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (ReplicateI zero));
-  format %{ "vxorpd  $dst,$dst,$dst\t! replicate8I zero" %}
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
   ins_encode %{
     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
     bool vector256 = true;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -2136,10 +2136,10 @@
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateL src));
   format %{ "movdq   $dst,$src\n\t"
-            "movlhps $dst,$dst\t! replicate2L" %}
+            "punpcklqdq $dst,$dst\t! replicate2L" %}
   ins_encode %{
     __ movdq($dst$$XMMRegister, $src$$Register);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2148,12 +2148,12 @@
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateL src));
   format %{ "movdq   $dst,$src\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movdq($dst$$XMMRegister, $src$$Register);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2165,12 +2165,12 @@
   format %{ "movdl   $dst,$src.lo\n\t"
             "movdl   $tmp,$src.hi\n\t"
             "punpckldq $dst,$tmp\n\t"
-            "movlhps $dst,$dst\t! replicate2L"%}
+            "punpcklqdq $dst,$dst\t! replicate2L"%}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2182,14 +2182,14 @@
   format %{ "movdl   $dst,$src.lo\n\t"
             "movdl   $tmp,$src.hi\n\t"
             "punpckldq $dst,$tmp\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2199,11 +2199,11 @@
 instruct Repl2L_imm(vecX dst, immL con) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateL con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate2L($con)\n\t"
-            "movlhps $dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress($con));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress($con));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2211,13 +2211,13 @@
 instruct Repl4L_imm(vecY dst, immL con) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateL con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate4L($con)\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress($con));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress($con));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2225,26 +2225,26 @@
 // Long could be loaded into xmm register directly from memory.
 instruct Repl2L_mem(vecX dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateL (LoadVector mem)));
+  match(Set dst (ReplicateL (LoadL mem)));
   format %{ "movq    $dst,$mem\n\t"
-            "movlhps $dst,$dst\t! replicate2L" %}
+            "punpcklqdq $dst,$dst\t! replicate2L" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $mem$$Address);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl4L_mem(vecY dst, memory mem) %{
   predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateL (LoadVector mem)));
+  match(Set dst (ReplicateL (LoadL mem)));
   format %{ "movq    $dst,$mem\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $mem$$Address);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2263,11 +2263,11 @@
 instruct Repl4L_zero(vecY dst, immL0 zero) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateL zero));
-  format %{ "vxorpd  $dst,$dst,$dst\t! replicate4L zero" %}
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
   ins_encode %{
     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
     bool vector256 = true;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
   %}
   ins_pipe( fpu_reg_reg );
 %}
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -3505,8 +3505,10 @@
   }
 
   // now perform tests that are based on flag settings
-  if (callee->should_inline()) {
+  if (callee->force_inline() || callee->should_inline()) {
     // ignore heuristic controls on inlining
+    if (callee->force_inline())
+      CompileTask::print_inlining(callee, scope()->level(), bci(), "force inline by annotation");
   } else {
     if (inline_level() > MaxInlineLevel                         ) INLINE_BAILOUT("too-deep inlining");
     if (recursive_inline_level(callee) > MaxRecursiveInlineLevel) INLINE_BAILOUT("too-deep recursive inlining");
@@ -3531,7 +3533,7 @@
   }
 
 #ifndef PRODUCT
-      // printing
+  // printing
   if (PrintInlining) {
     print_inline_result(callee, true);
   }
--- a/src/share/vm/ci/ciMethod.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/ci/ciMethod.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -160,6 +160,8 @@
   // Code size for inlining decisions.
   int code_size_for_inlining();
 
+  bool force_inline() { return get_methodOop()->force_inline(); }
+
   int comp_level();
   int highest_osr_comp_level();
 
--- a/src/share/vm/classfile/classFileParser.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/classfile/classFileParser.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -318,6 +318,13 @@
 
 bool inline valid_cp_range(int index, int length) { return (index > 0 && index < length); }
 
+inline Symbol* check_symbol_at(constantPoolHandle cp, int index) {
+  if (valid_cp_range(index, cp->length()) && cp->tag_at(index).is_utf8())
+    return cp->symbol_at(index);
+  else
+    return NULL;
+}
+
 constantPoolHandle ClassFileParser::parse_constant_pool(Handle class_loader, TRAPS) {
   ClassFileStream* cfs = stream();
   constantPoolHandle nullHandle;
@@ -902,6 +909,7 @@
                                              bool* is_synthetic_addr,
                                              u2* generic_signature_index_addr,
                                              typeArrayHandle* field_annotations,
+                                             ClassFileParser::FieldAnnotationCollector* parsed_annotations,
                                              TRAPS) {
   ClassFileStream* cfs = stream();
   assert(attributes_count > 0, "length should be greater than 0");
@@ -1142,12 +1150,14 @@
     bool is_synthetic = false;
     u2 generic_signature_index = 0;
     bool is_static = access_flags.is_static();
+    FieldAnnotationCollector parsed_annotations;
 
     u2 attributes_count = cfs->get_u2_fast();
     if (attributes_count > 0) {
       parse_field_attributes(cp, attributes_count, is_static, signature_index,
                              &constantvalue_index, &is_synthetic,
                              &generic_signature_index, &field_annotations,
+                             &parsed_annotations,
                              CHECK_(nullHandle));
       if (field_annotations.not_null()) {
         if (fields_annotations->is_null()) {
@@ -1173,6 +1183,8 @@
                       signature_index,
                       constantvalue_index,
                       0);
+    if (parsed_annotations.has_any_annotations())
+      parsed_annotations.apply_to(field);
 
     BasicType type = cp->basic_type_for_signature_at(signature_index);
 
@@ -1634,12 +1646,158 @@
       name->as_C_string(), _class_name->as_C_string(), sig->as_C_string());
 }
 
+// Skip an annotation.  Return >=limit if there is any problem.
+int ClassFileParser::skip_annotation(u1* buffer, int limit, int index) {
+  // annotation := atype:u2 do(nmem:u2) {member:u2 value}
+  // value := switch (tag:u1) { ... }
+  index += 2;  // skip atype
+  if ((index += 2) >= limit)  return limit;  // read nmem
+  int nmem = Bytes::get_Java_u2(buffer+index-2);
+  while (--nmem >= 0 && index < limit) {
+    index += 2; // skip member
+    index = skip_annotation_value(buffer, limit, index);
+  }
+  return index;
+}
+
+// Skip an annotation value.  Return >=limit if there is any problem.
+int ClassFileParser::skip_annotation_value(u1* buffer, int limit, int index) {
+  // value := switch (tag:u1) {
+  //   case B, C, I, S, Z, D, F, J, c: con:u2;
+  //   case e: e_class:u2 e_name:u2;
+  //   case s: s_con:u2;
+  //   case [: do(nval:u2) {value};
+  //   case @: annotation;
+  //   case s: s_con:u2;
+  // }
+  if ((index += 1) >= limit)  return limit;  // read tag
+  u1 tag = buffer[index-1];
+  switch (tag) {
+  case 'B': case 'C': case 'I': case 'S': case 'Z':
+  case 'D': case 'F': case 'J': case 'c': case 's':
+    index += 2;  // skip con or s_con
+    break;
+  case 'e':
+    index += 4;  // skip e_class, e_name
+    break;
+  case '[':
+    {
+      if ((index += 2) >= limit)  return limit;  // read nval
+      int nval = Bytes::get_Java_u2(buffer+index-2);
+      while (--nval >= 0 && index < limit) {
+        index = skip_annotation_value(buffer, limit, index);
+      }
+    }
+    break;
+  case '@':
+    index = skip_annotation(buffer, limit, index);
+    break;
+  default:
+    assert(false, "annotation tag");
+    return limit;  //  bad tag byte
+  }
+  return index;
+}
+
+// Sift through annotations, looking for those significant to the VM:
+void ClassFileParser::parse_annotations(u1* buffer, int limit,
+                                        constantPoolHandle cp,
+                                        ClassFileParser::AnnotationCollector* coll,
+                                        TRAPS) {
+  // annotations := do(nann:u2) {annotation}
+  int index = 0;
+  if ((index += 2) >= limit)  return;  // read nann
+  int nann = Bytes::get_Java_u2(buffer+index-2);
+  enum {  // initial annotation layout
+    atype_off = 0,      // utf8 such as 'Ljava/lang/annotation/Retention;'
+    count_off = 2,      // u2   such as 1 (one value)
+    member_off = 4,     // utf8 such as 'value'
+    tag_off = 6,        // u1   such as 'c' (type) or 'e' (enum)
+    e_tag_val = 'e',
+      e_type_off = 7,   // utf8 such as 'Ljava/lang/annotation/RetentionPolicy;'
+      e_con_off = 9,    // utf8 payload, such as 'SOURCE', 'CLASS', 'RUNTIME'
+      e_size = 11,     // end of 'e' annotation
+    c_tag_val = 'c',
+      c_con_off = 7,    // utf8 payload, such as 'I' or 'Ljava/lang/String;'
+      c_size = 9,       // end of 'c' annotation
+    min_size = 6        // smallest possible size (zero members)
+  };
+  while ((--nann) >= 0 && (index-2 + min_size <= limit)) {
+    int index0 = index;
+    index = skip_annotation(buffer, limit, index);
+    u1* abase = buffer + index0;
+    int atype = Bytes::get_Java_u2(abase + atype_off);
+    int count = Bytes::get_Java_u2(abase + count_off);
+    Symbol* aname = check_symbol_at(cp, atype);
+    if (aname == NULL)  break;  // invalid annotation name
+    Symbol* member = NULL;
+    if (count >= 1) {
+      int member_index = Bytes::get_Java_u2(abase + member_off);
+      member = check_symbol_at(cp, member_index);
+      if (member == NULL)  break;  // invalid member name
+    }
+
+    // Here is where parsing particular annotations will take place.
+    AnnotationCollector::ID id = coll->annotation_index(aname);
+    if (id == AnnotationCollector::_unknown)  continue;
+    coll->set_annotation(id);
+    // If there are no values, just set the bit and move on:
+    if (count == 0)   continue;
+
+    // For the record, here is how annotation payloads can be collected.
+    // Suppose we want to capture @Retention.value.  Here is how:
+    //if (id == AnnotationCollector::_class_Retention) {
+    //  Symbol* payload = NULL;
+    //  if (count == 1
+    //      && e_size == (index0 - index)  // match size
+    //      && e_tag_val == *(abase + tag_off)
+    //      && (check_symbol_at(cp, Bytes::get_Java_u2(abase + e_type_off))
+    //          == vmSymbols::RetentionPolicy_signature())
+    //      && member == vmSymbols::value_name()) {
+    //    payload = check_symbol_at(cp, Bytes::get_Java_u2(abase + e_con_off));
+    //  }
+    //  check_property(payload != NULL,
+    //                 "Invalid @Retention annotation at offset %u in class file %s",
+    //                 index0, CHECK);
+    //  if (payload != NULL) {
+    //      payload->increment_refcount();
+    //      coll->_class_RetentionPolicy = payload;
+    //  }
+    //}
+  }
+}
+
+ClassFileParser::AnnotationCollector::ID ClassFileParser::AnnotationCollector::annotation_index(Symbol* name) {
+  vmSymbols::SID sid = vmSymbols::find_sid(name);
+  switch (sid) {
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_ForceInline_signature):
+    if (_location != _in_method)  break;  // only allow for methods
+    return _method_ForceInline;
+  default: break;
+  }
+  return AnnotationCollector::_unknown;
+}
+
+void ClassFileParser::FieldAnnotationCollector::apply_to(FieldInfo* f) {
+  fatal("no field annotations yet");
+}
+
+void ClassFileParser::MethodAnnotationCollector::apply_to(methodHandle m) {
+  if (has_annotation(_method_ForceInline))
+    m->set_force_inline(true);
+}
+
+void ClassFileParser::ClassAnnotationCollector::apply_to(instanceKlassHandle k) {
+  fatal("no class annotations yet");
+}
+
+
 #define MAX_ARGS_SIZE 255
 #define MAX_CODE_SIZE 65535
 #define INITIAL_MAX_LVT_NUMBER 256
 
 // Note: the parse_method below is big and clunky because all parsing of the code and exceptions
-// attribute is inlined. This is curbersome to avoid since we inline most of the parts in the
+// attribute is inlined. This is cumbersome to avoid since we inline most of the parts in the
 // methodOop to save footprint, so we only know the size of the resulting methodOop when the
 // entire method attribute is parsed.
 //
@@ -1730,6 +1888,7 @@
   // stackmap attribute - JDK1.5
   typeArrayHandle stackmap_data;
   u2 generic_signature_index = 0;
+  MethodAnnotationCollector parsed_annotations;
   u1* runtime_visible_annotations = NULL;
   int runtime_visible_annotations_length = 0;
   u1* runtime_invisible_annotations = NULL;
@@ -1956,6 +2115,7 @@
         runtime_visible_annotations_length = method_attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
+        parse_annotations(runtime_visible_annotations, runtime_visible_annotations_length, cp, &parsed_annotations, CHECK_(nullHandle));
         cfs->skip_u1(runtime_visible_annotations_length, CHECK_(nullHandle));
       } else if (PreserveAllAnnotations && method_attribute_name == vmSymbols::tag_runtime_invisible_annotations()) {
         runtime_invisible_annotations_length = method_attribute_length;
@@ -2144,6 +2304,8 @@
     clear_hashtable(lvt_Hash);
   }
 
+  if (parsed_annotations.has_any_annotations())
+    parsed_annotations.apply_to(m);
   *method_annotations = assemble_annotations(runtime_visible_annotations,
                                              runtime_visible_annotations_length,
                                              runtime_invisible_annotations,
@@ -2322,7 +2484,7 @@
 }
 
 
-void ClassFileParser::parse_classfile_sourcefile_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_sourcefile_attribute(constantPoolHandle cp, TRAPS) {
   ClassFileStream* cfs = stream();
   cfs->guarantee_more(2, CHECK);  // sourcefile_index
   u2 sourcefile_index = cfs->get_u2_fast();
@@ -2331,13 +2493,12 @@
       cp->tag_at(sourcefile_index).is_utf8(),
     "Invalid SourceFile attribute at constant pool index %u in class file %s",
     sourcefile_index, CHECK);
-  k->set_source_file_name(cp->symbol_at(sourcefile_index));
+  set_class_sourcefile(cp->symbol_at(sourcefile_index));
 }
 
 
 
 void ClassFileParser::parse_classfile_source_debug_extension_attribute(constantPoolHandle cp,
-                                                                       instanceKlassHandle k,
                                                                        int length, TRAPS) {
   ClassFileStream* cfs = stream();
   u1* sde_buffer = cfs->get_u1_buffer();
@@ -2345,7 +2506,13 @@
 
   // Don't bother storing it if there is no way to retrieve it
   if (JvmtiExport::can_get_source_debug_extension()) {
-    k->set_source_debug_extension((char*)sde_buffer, length);
+    assert((length+1) > length, "Overflow checking");
+    u1* sde = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, u1, length+1);
+    for (int i = 0; i < length; i++) {
+      sde[i] = sde_buffer[i];
+    }
+    sde[length] = '\0';
+    set_class_sde_buffer((char*)sde, length);
   }
   // Got utf8 string, set stream position forward
   cfs->skip_u1(length, CHECK);
@@ -2361,7 +2528,7 @@
                                                             u2 enclosing_method_class_index,
                                                             u2 enclosing_method_method_index,
                                                             constantPoolHandle cp,
-                                                            instanceKlassHandle k, TRAPS) {
+                                                            TRAPS) {
   ClassFileStream* cfs = stream();
   u1* current_mark = cfs->current();
   u2 length = 0;
@@ -2452,7 +2619,7 @@
   assert(index == size, "wrong size");
 
   // Update instanceKlass with inner class info.
-  k->set_inner_classes(inner_classes());
+  set_class_inner_classes(inner_classes);
 
   // Restore buffer's current position.
   cfs->set_current(current_mark);
@@ -2460,11 +2627,11 @@
   return length;
 }
 
-void ClassFileParser::parse_classfile_synthetic_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
-  k->set_is_synthetic();
+void ClassFileParser::parse_classfile_synthetic_attribute(constantPoolHandle cp, TRAPS) {
+  set_class_synthetic_flag(true);
 }
 
-void ClassFileParser::parse_classfile_signature_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_signature_attribute(constantPoolHandle cp, TRAPS) {
   ClassFileStream* cfs = stream();
   u2 signature_index = cfs->get_u2(CHECK);
   check_property(
@@ -2472,10 +2639,10 @@
       cp->tag_at(signature_index).is_utf8(),
     "Invalid constant pool index %u in Signature attribute in class file %s",
     signature_index, CHECK);
-  k->set_generic_signature(cp->symbol_at(signature_index));
+  set_class_generic_signature(cp->symbol_at(signature_index));
 }
 
-void ClassFileParser::parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, instanceKlassHandle k,
+void ClassFileParser::parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp,
                                                                   u4 attribute_byte_length, TRAPS) {
   ClassFileStream* cfs = stream();
   u1* current_start = cfs->current();
@@ -2547,10 +2714,12 @@
 }
 
 
-void ClassFileParser::parse_classfile_attributes(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_attributes(constantPoolHandle cp,
+                                                 ClassFileParser::ClassAnnotationCollector* parsed_annotations,
+                                                 TRAPS) {
   ClassFileStream* cfs = stream();
   // Set inner classes attribute to default sentinel
-  k->set_inner_classes(Universe::the_empty_short_array());
+  set_class_inner_classes(typeArrayHandle(THREAD, Universe::the_empty_short_array()));
   cfs->guarantee_more(2, CHECK);  // attributes_count
   u2 attributes_count = cfs->get_u2_fast();
   bool parsed_sourcefile_attribute = false;
@@ -2586,10 +2755,10 @@
       } else {
         parsed_sourcefile_attribute = true;
       }
-      parse_classfile_sourcefile_attribute(cp, k, CHECK);
+      parse_classfile_sourcefile_attribute(cp, CHECK);
     } else if (tag == vmSymbols::tag_source_debug_extension()) {
       // Check for SourceDebugExtension tag
-      parse_classfile_source_debug_extension_attribute(cp, k, (int)attribute_length, CHECK);
+      parse_classfile_source_debug_extension_attribute(cp, (int)attribute_length, CHECK);
     } else if (tag == vmSymbols::tag_inner_classes()) {
       // Check for InnerClasses tag
       if (parsed_innerclasses_attribute) {
@@ -2608,7 +2777,7 @@
           "Invalid Synthetic classfile attribute length %u in class file %s",
           attribute_length, CHECK);
       }
-      parse_classfile_synthetic_attribute(cp, k, CHECK);
+      parse_classfile_synthetic_attribute(cp, CHECK);
     } else if (tag == vmSymbols::tag_deprecated()) {
       // Check for Deprecatd tag - 4276120
       if (attribute_length != 0) {
@@ -2623,11 +2792,16 @@
             "Wrong Signature attribute length %u in class file %s",
             attribute_length, CHECK);
         }
-        parse_classfile_signature_attribute(cp, k, CHECK);
+        parse_classfile_signature_attribute(cp, CHECK);
       } else if (tag == vmSymbols::tag_runtime_visible_annotations()) {
         runtime_visible_annotations_length = attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
+        parse_annotations(runtime_visible_annotations,
+                          runtime_visible_annotations_length,
+                          cp,
+                          parsed_annotations,
+                          CHECK);
         cfs->skip_u1(runtime_visible_annotations_length, CHECK);
       } else if (PreserveAllAnnotations && tag == vmSymbols::tag_runtime_invisible_annotations()) {
         runtime_invisible_annotations_length = attribute_length;
@@ -2661,7 +2835,7 @@
         if (parsed_bootstrap_methods_attribute)
           classfile_parse_error("Multiple BootstrapMethods attributes in class file %s", CHECK);
         parsed_bootstrap_methods_attribute = true;
-        parse_classfile_bootstrap_methods_attribute(cp, k, attribute_length, CHECK);
+        parse_classfile_bootstrap_methods_attribute(cp, attribute_length, CHECK);
       } else {
         // Unknown attribute
         cfs->skip_u1(attribute_length, CHECK);
@@ -2676,7 +2850,7 @@
                                                      runtime_invisible_annotations,
                                                      runtime_invisible_annotations_length,
                                                      CHECK);
-  k->set_class_annotations(annotations());
+  set_class_annotations(annotations);
 
   if (parsed_innerclasses_attribute || parsed_enclosingmethod_attribute) {
     u2 num_of_classes = parse_classfile_inner_classes_attribute(
@@ -2684,7 +2858,7 @@
                             parsed_innerclasses_attribute,
                             enclosing_method_class_index,
                             enclosing_method_method_index,
-                            cp, k, CHECK);
+                            cp, CHECK);
     if (parsed_innerclasses_attribute &&_need_verify && _major_version >= JAVA_1_5_VERSION) {
       guarantee_property(
         inner_classes_attribute_length == sizeof(num_of_classes) + 4 * sizeof(u2) * num_of_classes,
@@ -2698,6 +2872,23 @@
   }
 }
 
+void ClassFileParser::apply_parsed_class_attributes(instanceKlassHandle k) {
+  if (_synthetic_flag)
+    k->set_is_synthetic();
+  if (_sourcefile != NULL) {
+    _sourcefile->increment_refcount();
+    k->set_source_file_name(_sourcefile);
+  }
+  if (_generic_signature != NULL) {
+    _generic_signature->increment_refcount();
+    k->set_generic_signature(_generic_signature);
+  }
+  if (_sde_buffer != NULL) {
+    k->set_source_debug_extension(_sde_buffer, _sde_length);
+  }
+  k->set_inner_classes(_inner_classes());
+  k->set_class_annotations(_annotations());
+}
 
 typeArrayHandle ClassFileParser::assemble_annotations(u1* runtime_visible_annotations,
                                                       int runtime_visible_annotations_length,
@@ -2748,8 +2939,7 @@
                             jt->get_thread_stat()->perf_timers_addr(),
                             PerfClassTraceTime::PARSE_CLASS);
 
-  _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false;
-  _max_bootstrap_specifier_index = -1;
+  init_parsed_class_attributes();
 
   if (JvmtiExport::should_post_class_file_load_hook()) {
     // Get the cached class file bytes (if any) from the class that
@@ -2982,6 +3172,13 @@
     objArrayHandle methods_parameter_annotations(THREAD, methods_parameter_annotations_oop);
     objArrayHandle methods_default_annotations(THREAD, methods_default_annotations_oop);
 
+    // Additional attributes
+    ClassAnnotationCollector parsed_annotations;
+    parse_classfile_attributes(cp, &parsed_annotations, CHECK_(nullHandle));
+
+    // Make sure this is the end of class file stream
+    guarantee_property(cfs->at_eos(), "Extra bytes at the end of class file %s", CHECK_(nullHandle));
+
     // We check super class after class file is parsed and format is checked
     if (super_class_index > 0 && super_klass.is_null()) {
       Symbol*  sk  = cp->klass_name_at(super_class_index);
@@ -3470,11 +3667,10 @@
       this_klass->set_has_miranda_methods(); // then set a flag
     }
 
-    // Additional attributes
-    parse_classfile_attributes(cp, this_klass, CHECK_(nullHandle));
-
-    // Make sure this is the end of class file stream
-    guarantee_property(cfs->at_eos(), "Extra bytes at the end of class file %s", CHECK_(nullHandle));
+    // Fill in field values obtained by parse_classfile_attributes
+    if (parsed_annotations.has_any_annotations())
+      parsed_annotations.apply_to(this_klass);
+    apply_parsed_class_attributes(this_klass);
 
     // VerifyOops believes that once this has been set, the object is completely loaded.
     // Compute transitive closure of interfaces this class implements
@@ -3489,6 +3685,7 @@
     // Do final class setup
     fill_oop_maps(this_klass, nonstatic_oop_map_count, nonstatic_oop_offsets, nonstatic_oop_counts);
 
+    // Fill in has_finalizer, has_vanilla_constructor, and layout_helper
     set_precomputed_flags(this_klass);
 
     // reinitialize modifiers, using the InnerClasses attribute
--- a/src/share/vm/classfile/classFileParser.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/classfile/classFileParser.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -31,8 +31,8 @@
 #include "oops/typeArrayOop.hpp"
 #include "runtime/handles.inline.hpp"
 #include "utilities/accessFlags.hpp"
+#include "classfile/symbolTable.hpp"
 
-class TempNewSymbol;
 class FieldAllocationCount;
 
 
@@ -50,11 +50,80 @@
   KlassHandle _host_klass;
   GrowableArray<Handle>* _cp_patches; // overrides for CP entries
 
+  // precomputed flags
   bool _has_finalizer;
   bool _has_empty_finalizer;
   bool _has_vanilla_constructor;
+  int _max_bootstrap_specifier_index;  // detects BSS values
 
-  int _max_bootstrap_specifier_index;
+  // class attributes parsed before the instance klass is created:
+  bool       _synthetic_flag;
+  Symbol*    _sourcefile;
+  Symbol*    _generic_signature;
+  char*      _sde_buffer;
+  int        _sde_length;
+  typeArrayHandle _inner_classes;
+  typeArrayHandle _annotations;
+
+  void set_class_synthetic_flag(bool x)           { _synthetic_flag = x; }
+  void set_class_sourcefile(Symbol* x)            { _sourcefile = x; }
+  void set_class_generic_signature(Symbol* x)     { _generic_signature = x; }
+  void set_class_sde_buffer(char* x, int len)     { _sde_buffer = x; _sde_length = len; }
+  void set_class_inner_classes(typeArrayHandle x) { _inner_classes = x; }
+  void set_class_annotations(typeArrayHandle x)   { _annotations = x; }
+  void init_parsed_class_attributes() {
+    _synthetic_flag = false;
+    _sourcefile = NULL;
+    _generic_signature = NULL;
+    _sde_buffer = NULL;
+    _sde_length = 0;
+    // initialize the other flags too:
+    _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false;
+    _max_bootstrap_specifier_index = -1;
+  }
+  void apply_parsed_class_attributes(instanceKlassHandle k);  // update k
+
+  class AnnotationCollector {
+  public:
+    enum Location { _in_field, _in_method, _in_class };
+    enum ID {
+      _unknown = 0,
+      _method_ForceInline,
+      _annotation_LIMIT
+    };
+    const Location _location;
+    int _annotations_present;
+    AnnotationCollector(Location location)
+    : _location(location), _annotations_present(0)
+    {
+      assert((int)_annotation_LIMIT <= (int)sizeof(_annotations_present) * BitsPerByte, "");
+    }
+    // If this annotation name has an ID, report it (or _none).
+    ID annotation_index(Symbol* name);
+    // Set the annotation name:
+    void set_annotation(ID id) {
+      assert((int)id >= 0 && (int)id < (int)_annotation_LIMIT, "oob");
+      _annotations_present |= nth_bit((int)id);
+    }
+    // Report if the annotation is present.
+    bool has_any_annotations() { return _annotations_present != 0; }
+    bool has_annotation(ID id) { return (nth_bit((int)id) & _annotations_present) != 0; }
+  };
+  class FieldAnnotationCollector: public AnnotationCollector {
+  public:
+    FieldAnnotationCollector() : AnnotationCollector(_in_field) { }
+    void apply_to(FieldInfo* f);
+  };
+  class MethodAnnotationCollector: public AnnotationCollector {
+  public:
+    MethodAnnotationCollector() : AnnotationCollector(_in_method) { }
+    void apply_to(methodHandle m);
+  };
+  class ClassAnnotationCollector: public AnnotationCollector {
+  public:
+    ClassAnnotationCollector() : AnnotationCollector(_in_class) { }
+    void apply_to(instanceKlassHandle k);
+  };
 
   enum { fixed_buffer_size = 128 };
   u_char linenumbertable_buffer[fixed_buffer_size];
@@ -87,7 +156,9 @@
                               u2* constantvalue_index_addr,
                               bool* is_synthetic_addr,
                               u2* generic_signature_index_addr,
-                              typeArrayHandle* field_annotations, TRAPS);
+                              typeArrayHandle* field_annotations,
+                              FieldAnnotationCollector* parsed_annotations,
+                              TRAPS);
   typeArrayHandle parse_fields(Symbol* class_name,
                                constantPoolHandle cp, bool is_interface,
                                FieldAllocationCount *fac,
@@ -128,25 +199,32 @@
   typeArrayOop parse_stackmap_table(u4 code_attribute_length, TRAPS);
 
   // Classfile attribute parsing
-  void parse_classfile_sourcefile_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_source_debug_extension_attribute(constantPoolHandle cp,
-                                                instanceKlassHandle k, int length, TRAPS);
+  void parse_classfile_sourcefile_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_source_debug_extension_attribute(constantPoolHandle cp, int length, TRAPS);
   u2   parse_classfile_inner_classes_attribute(u1* inner_classes_attribute_start,
                                                bool parsed_enclosingmethod_attribute,
                                                u2 enclosing_method_class_index,
                                                u2 enclosing_method_method_index,
                                                constantPoolHandle cp,
-                                               instanceKlassHandle k, TRAPS);
-  void parse_classfile_attributes(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_synthetic_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_signature_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, instanceKlassHandle k, u4 attribute_length, TRAPS);
+                                               TRAPS);
+  void parse_classfile_attributes(constantPoolHandle cp,
+                                  ClassAnnotationCollector* parsed_annotations,
+                                  TRAPS);
+  void parse_classfile_synthetic_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_signature_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, u4 attribute_length, TRAPS);
 
   // Annotations handling
   typeArrayHandle assemble_annotations(u1* runtime_visible_annotations,
                                        int runtime_visible_annotations_length,
                                        u1* runtime_invisible_annotations,
                                        int runtime_invisible_annotations_length, TRAPS);
+  int skip_annotation(u1* buffer, int limit, int index);
+  int skip_annotation_value(u1* buffer, int limit, int index);
+  void parse_annotations(u1* buffer, int limit, constantPoolHandle cp,
+                         /* Results (currently, only one result is supported): */
+                         AnnotationCollector* result,
+                         TRAPS);
 
   // Final setup
   unsigned int compute_oop_map_count(instanceKlassHandle super,
--- a/src/share/vm/classfile/javaClasses.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/classfile/javaClasses.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -2738,17 +2738,6 @@
   if (k != NULL) {
     compute_offset(_target_offset, k, vmSymbols::target_name(), vmSymbols::java_lang_invoke_MethodHandle_signature());
   }
-
-  // Disallow compilation of CallSite.setTargetNormal and CallSite.setTargetVolatile
-  // (For C2:  keep this until we have throttling logic for uncommon traps.)
-  if (k != NULL) {
-    instanceKlass* ik = instanceKlass::cast(k);
-    methodOop m_normal   = ik->lookup_method(vmSymbols::setTargetNormal_name(),   vmSymbols::setTarget_signature());
-    methodOop m_volatile = ik->lookup_method(vmSymbols::setTargetVolatile_name(), vmSymbols::setTarget_signature());
-    guarantee(m_normal != NULL && m_volatile != NULL, "must exist");
-    m_normal->set_not_compilable_quietly();
-    m_volatile->set_not_compilable_quietly();
-  }
 }
 
 
--- a/src/share/vm/classfile/vmSymbols.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/classfile/vmSymbols.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -257,6 +257,7 @@
   template(java_lang_invoke_BoundMethodHandle,        "java/lang/invoke/BoundMethodHandle")       \
   template(java_lang_invoke_DirectMethodHandle,       "java/lang/invoke/DirectMethodHandle")      \
   template(java_lang_invoke_CountingMethodHandle,     "java/lang/invoke/CountingMethodHandle")    \
+  template(java_lang_invoke_ForceInline_signature,    "Ljava/lang/invoke/ForceInline;")           \
   /* internal up-calls made only by the JVM, via class sun.invoke.MethodHandleNatives: */         \
   template(findMethodHandleType_name,                 "findMethodHandleType")                     \
   template(findMethodHandleType_signature,       "(Ljava/lang/Class;[Ljava/lang/Class;)Ljava/lang/invoke/MethodType;") \
--- a/src/share/vm/code/vmreg.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/code/vmreg.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -131,6 +131,10 @@
     assert((is_reg() && value() < stack0->value() - 1) || is_stack(), "must be");
     return (VMReg)(intptr_t)(value() + 1);
   }
+  VMReg next(int i) {
+    assert((is_reg() && value() < stack0->value() - i) || is_stack(), "must be");
+    return (VMReg)(intptr_t)(value() + i);
+  }
   VMReg prev() {
     assert((is_stack() && value() > stack0->value()) || (is_reg() && value() != 0), "must be");
     return (VMReg)(intptr_t)(value() - 1);
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "memory/space.inline.hpp"
@@ -500,11 +501,11 @@
 }
 
 void ConcurrentG1Refine::clear_and_record_card_counts() {
-  if (G1ConcRSLogCacheSize == 0) return;
+  if (G1ConcRSLogCacheSize == 0) {
+    return;
+  }
 
-#ifndef PRODUCT
   double start = os::elapsedTime();
-#endif
 
   if (_expand_card_counts) {
     int new_idx = _cache_size_index + 1;
@@ -523,11 +524,8 @@
   assert((this_epoch+1) <= max_jint, "to many periods");
   // Update epoch
   _n_periods++;
-
-#ifndef PRODUCT
-  double elapsed = os::elapsedTime() - start;
-  _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0);
-#endif
+  double cc_clear_time_ms = (os::elapsedTime() - start) * 1000;
+  _g1h->g1_policy()->phase_times()->record_cc_clear_time_ms(cc_clear_time_ms);
 }
 
 void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -3156,9 +3156,6 @@
       _g1h->g1_policy()->record_concurrent_pause();
     }
     cmThread()->yield();
-    if (worker_id == 0) {
-      _g1h->g1_policy()->record_concurrent_pause_end();
-    }
     return true;
   } else {
     return false;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -33,6 +33,7 @@
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1EvacFailure.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
@@ -2274,7 +2275,7 @@
   while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
     n_completed_buffers++;
   }
-  g1_policy()->record_update_rs_processed_buffers(worker_i,
+  g1_policy()->phase_times()->record_update_rs_processed_buffers(worker_i,
                                                   (double) n_completed_buffers);
   dcqs.clear_n_completed_buffers();
   assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
@@ -3633,10 +3634,10 @@
     gclog_or_tty->date_stamp(G1Log::fine() && PrintGCDateStamps);
     TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
 
-    GCCauseString gc_cause_str = GCCauseString("GC pause", gc_cause())
-      .append(g1_policy()->gcs_are_young() ? " (young)" : " (mixed)")
-      .append(g1_policy()->during_initial_mark_pause() ? " (initial-mark)" : "");
-    TraceTime t(gc_cause_str, G1Log::fine() && !G1Log::finer(), true, gclog_or_tty);
+    int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                                workers()->active_workers() : 1);
+    g1_policy()->phase_times()->note_gc_start(os::elapsedTime(), active_workers,
+      g1_policy()->gcs_are_young(), g1_policy()->during_initial_mark_pause(), gc_cause());
 
     TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
     TraceMemoryManagerStats tms(false /* fullGC */, gc_cause());
@@ -3699,9 +3700,15 @@
         // before the start GC event.
         _hr_printer.start_gc(false /* full */, (size_t) total_collections());
 
+        // This timing is only used by the ergonomics to handle our pause target.
+        // It is unclear why this should not include the full pause. We will
+        // investigate this in CR 7178365.
+        //
+        // Preserving the old comment here if that helps the investigation:
+        //
         // The elapsed time induced by the start time below deliberately elides
         // the possible verification above.
-        double start_time_sec = os::elapsedTime();
+        double sample_start_time_sec = os::elapsedTime();
         size_t start_used_bytes = used();
 
 #if YOUNG_LIST_VERBOSE
@@ -3710,7 +3717,7 @@
         g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE
 
-        g1_policy()->record_collection_pause_start(start_time_sec,
+        g1_policy()->record_collection_pause_start(sample_start_time_sec,
                                                    start_used_bytes);
 
         double scan_wait_start = os::elapsedTime();
@@ -3719,11 +3726,12 @@
         // objects on them have been correctly scanned before we start
         // moving them during the GC.
         bool waited = _cm->root_regions()->wait_until_scan_finished();
+        double wait_time_ms = 0.0;
         if (waited) {
           double scan_wait_end = os::elapsedTime();
-          double wait_time_ms = (scan_wait_end - scan_wait_start) * 1000.0;
-          g1_policy()->record_root_region_scan_wait_time(wait_time_ms);
+          wait_time_ms = (scan_wait_end - scan_wait_start) * 1000.0;
         }
+        g1_policy()->phase_times()->record_root_region_scan_wait_time(wait_time_ms);
 
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nAfter recording pause start.\nYoung_list:");
@@ -3877,12 +3885,16 @@
                                  true  /* verify_fingers */);
         _cm->note_end_of_gc();
 
-        double end_time_sec = os::elapsedTime();
-        double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
-        g1_policy()->record_pause_time_ms(pause_time_ms);
-        int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                                workers()->active_workers() : 1);
-        g1_policy()->record_collection_pause_end(active_workers);
+        // Collect thread local data to allow the ergonomics to use
+        // the collected information
+        g1_policy()->phase_times()->collapse_par_times();
+
+        // This timing is only used by the ergonomics to handle our pause target.
+        // It is unclear why this should not include the full pause. We will
+        // investigate this in CR 7178365.
+        double sample_end_time_sec = os::elapsedTime();
+        double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
+        g1_policy()->record_collection_pause_end(pause_time_ms);
 
         MemoryService::track_memory_usage();
 
@@ -3929,9 +3941,6 @@
       // RETIRE events are generated before the end GC event.
       _hr_printer.end_gc(false /* full */, (size_t) total_collections());
 
-      // We have to do this after we decide whether to expand the heap or not.
-      g1_policy()->print_heap_transition();
-
       if (mark_in_progress()) {
         concurrent_mark()->update_g1_committed();
       }
@@ -3941,13 +3950,14 @@
 #endif
 
       gc_epilogue(false);
+
+      g1_policy()->phase_times()->note_gc_end(os::elapsedTime());
+
+      // We have to do this after we decide whether to expand the heap or not.
+      g1_policy()->print_heap_transition();
     }
 
-    // The closing of the inner scope, immediately above, will complete
-    // logging at the "fine" level. The record_collection_pause_end() call
-    // above will complete logging at the "finer" level.
-    //
-    // It is not yet to safe, however, to tell the concurrent mark to
+    // It is not yet to safe to tell the concurrent mark to
     // start as we have some optional output below. We don't want the
     // output from the concurrent mark thread interfering with this
     // logging output either.
@@ -4695,7 +4705,7 @@
     if (worker_id >= _n_workers) return;  // no work needed this round
 
     double start_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->record_gc_worker_start_time(worker_id, start_time_ms);
+    _g1h->g1_policy()->phase_times()->record_gc_worker_start_time(worker_id, start_time_ms);
 
     {
       ResourceMark rm;
@@ -4744,8 +4754,8 @@
         evac.do_void();
         double elapsed_ms = (os::elapsedTime()-start)*1000.0;
         double term_ms = pss.term_time()*1000.0;
-        _g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
-        _g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
+        _g1h->g1_policy()->phase_times()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
+        _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts());
       }
       _g1h->g1_policy()->record_thread_age_table(pss.age_table());
       _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
@@ -4763,7 +4773,7 @@
     }
 
     double end_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->record_gc_worker_end_time(worker_id, end_time_ms);
+    _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms);
   }
 };
 
@@ -4874,15 +4884,15 @@
 
   double ext_roots_end = os::elapsedTime();
 
-  g1_policy()->reset_obj_copy_time(worker_i);
+  g1_policy()->phase_times()->reset_obj_copy_time(worker_i);
   double obj_copy_time_sec = buf_scan_perm.closure_app_seconds() +
                                 buf_scan_non_heap_roots.closure_app_seconds();
-  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
+  g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
 
   double ext_root_time_ms =
     ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
 
-  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
+  g1_policy()->phase_times()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
 
   // During conc marking we have to filter the per-thread SATB buffers
   // to make sure we remove any oops into the CSet (which will show up
@@ -4893,7 +4903,7 @@
     }
   }
   double satb_filtering_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
-  g1_policy()->record_satb_filtering_time(worker_i, satb_filtering_ms);
+  g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
   // Now scan the complement of the collection set.
   if (scan_rs != NULL) {
@@ -5393,7 +5403,7 @@
   assert(pss.refs()->is_empty(), "both queue and overflow should be empty");
 
   double ref_proc_time = os::elapsedTime() - ref_proc_start;
-  g1_policy()->record_ref_proc_time(ref_proc_time * 1000.0);
+  g1_policy()->phase_times()->record_ref_proc_time(ref_proc_time * 1000.0);
 }
 
 // Weak Reference processing during an evacuation pause (part 2).
@@ -5430,7 +5440,7 @@
   // and could signicantly increase the pause time.
 
   double ref_enq_time = os::elapsedTime() - ref_enq_start;
-  g1_policy()->record_ref_enq_time(ref_enq_time * 1000.0);
+  g1_policy()->phase_times()->record_ref_enq_time(ref_enq_time * 1000.0);
 }
 
 void G1CollectedHeap::evacuate_collection_set() {
@@ -5493,11 +5503,11 @@
   }
 
   double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
-  g1_policy()->record_par_time(par_time_ms);
+  g1_policy()->phase_times()->record_par_time(par_time_ms);
 
   double code_root_fixup_time_ms =
         (os::elapsedTime() - end_par_time_sec) * 1000.0;
-  g1_policy()->record_code_root_fixup_time(code_root_fixup_time_ms);
+  g1_policy()->phase_times()->record_code_root_fixup_time(code_root_fixup_time_ms);
 
   set_par_threads(0);
 
@@ -5759,7 +5769,7 @@
   }
 
   double elapsed = os::elapsedTime() - start;
-  g1_policy()->record_clear_ct_time(elapsed * 1000.0);
+  g1_policy()->phase_times()->record_clear_ct_time(elapsed * 1000.0);
 }
 
 void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) {
@@ -5868,8 +5878,8 @@
                                     NULL /* old_proxy_set */,
                                     NULL /* humongous_proxy_set */,
                                     false /* par */);
-  policy->record_young_free_cset_time_ms(young_time_ms);
-  policy->record_non_young_free_cset_time_ms(non_young_time_ms);
+  policy->phase_times()->record_young_free_cset_time_ms(young_time_ms);
+  policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms);
 }
 
 // This routine is similar to the above but does not record
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -29,6 +29,7 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/shared/gcPolicyCounters.hpp"
@@ -77,57 +78,6 @@
   1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30
 };
 
-// Help class for avoiding interleaved logging
-class LineBuffer: public StackObj {
-
-private:
-  static const int BUFFER_LEN = 1024;
-  static const int INDENT_CHARS = 3;
-  char _buffer[BUFFER_LEN];
-  int _indent_level;
-  int _cur;
-
-  void vappend(const char* format, va_list ap) {
-    int res = vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
-    if (res != -1) {
-      _cur += res;
-    } else {
-      DEBUG_ONLY(warning("buffer too small in LineBuffer");)
-      _buffer[BUFFER_LEN -1] = 0;
-      _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
-    }
-  }
-
-public:
-  explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
-    for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
-      _buffer[_cur] = ' ';
-    }
-  }
-
-#ifndef PRODUCT
-  ~LineBuffer() {
-    assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
-  }
-#endif
-
-  void append(const char* format, ...) {
-    va_list ap;
-    va_start(ap, format);
-    vappend(format, ap);
-    va_end(ap);
-  }
-
-  void append_and_print_cr(const char* format, ...) {
-    va_list ap;
-    va_start(ap, format);
-    vappend(format, ap);
-    va_end(ap);
-    gclog_or_tty->print_cr("%s", _buffer);
-    _cur = _indent_level * INDENT_CHARS;
-  }
-};
-
 G1CollectorPolicy::G1CollectorPolicy() :
   _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
                         ? ParallelGCThreads : 1),
@@ -135,20 +85,6 @@
   _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _stop_world_start(0.0),
 
-  _cur_clear_ct_time_ms(0.0),
-  _root_region_scan_wait_time_ms(0.0),
-
-  _cur_ref_proc_time_ms(0.0),
-  _cur_ref_enq_time_ms(0.0),
-
-#ifndef PRODUCT
-  _min_clear_cc_time_ms(-1.0),
-  _max_clear_cc_time_ms(-1.0),
-  _cur_clear_cc_time_ms(0.0),
-  _cum_clear_cc_time_ms(0.0),
-  _num_cc_clears(0L),
-#endif
-
   _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
 
@@ -257,30 +193,9 @@
   _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
   _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
 
-  _par_last_gc_worker_start_times_ms = new double[_parallel_gc_threads];
-  _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
-  _par_last_satb_filtering_times_ms = new double[_parallel_gc_threads];
-
-  _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
-  _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
-
-  _par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
-
-  _par_last_obj_copy_times_ms = new double[_parallel_gc_threads];
+  _phase_times = new G1GCPhaseTimes(_parallel_gc_threads);
 
-  _par_last_termination_times_ms = new double[_parallel_gc_threads];
-  _par_last_termination_attempts = new double[_parallel_gc_threads];
-  _par_last_gc_worker_end_times_ms = new double[_parallel_gc_threads];
-  _par_last_gc_worker_times_ms = new double[_parallel_gc_threads];
-  _par_last_gc_worker_other_times_ms = new double[_parallel_gc_threads];
-
-  int index;
-  if (ParallelGCThreads == 0)
-    index = 0;
-  else if (ParallelGCThreads > 8)
-    index = 7;
-  else
-    index = ParallelGCThreads - 1;
+  int index = MIN2(_parallel_gc_threads - 1, 7);
 
   _pending_card_diff_seq->add(0.0);
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
@@ -824,7 +739,7 @@
 #endif // PRODUCT
 
 void G1CollectorPolicy::record_full_collection_start() {
-  _cur_collection_start_sec = os::elapsedTime();
+  _full_collection_start_sec = os::elapsedTime();
   // Release the future to-space so that it is available for compaction into.
   _g1->set_full_collection();
 }
@@ -833,7 +748,7 @@
   // Consider this like a collection pause for the purposes of allocation
   // since last pause.
   double end_sec = os::elapsedTime();
-  double full_gc_time_sec = end_sec - _cur_collection_start_sec;
+  double full_gc_time_sec = end_sec - _full_collection_start_sec;
   double full_gc_time_ms = full_gc_time_sec * 1000.0;
 
   _trace_gen1_time_data.record_full_collection(full_gc_time_ms);
@@ -869,12 +784,6 @@
 
 void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
                                                       size_t start_used) {
-  if (G1Log::finer()) {
-    gclog_or_tty->stamp(PrintGCTimeStamps);
-    gclog_or_tty->print("[%s", (const char*)GCCauseString("GC pause", _g1->gc_cause())
-      .append(gcs_are_young() ? " (young)" : " (mixed)"));
-  }
-
   // We only need to do this here as the policy will only be applied
   // to the GC we're about to start. so, no point is calculating this
   // every time we calculate / recalculate the target young length.
@@ -888,7 +797,7 @@
   _trace_gen0_time_data.record_start_collection(s_w_t_ms);
   _stop_world_start = 0.0;
 
-  _cur_collection_start_sec = start_time_sec;
+  phase_times()->_cur_collection_start_sec = start_time_sec;
   _cur_collection_pause_used_at_start_bytes = start_used;
   _cur_collection_pause_used_regions_at_start = _g1->used_regions();
   _pending_cards = _g1->pending_card_num();
@@ -902,30 +811,6 @@
   _survivor_bytes_before_gc = young_list->survivor_used_bytes();
   _capacity_before_gc = _g1->capacity();
 
-#ifdef DEBUG
-  // initialise these to something well known so that we can spot
-  // if they are not set properly
-
-  for (int i = 0; i < _parallel_gc_threads; ++i) {
-    _par_last_gc_worker_start_times_ms[i] = -1234.0;
-    _par_last_ext_root_scan_times_ms[i] = -1234.0;
-    _par_last_satb_filtering_times_ms[i] = -1234.0;
-    _par_last_update_rs_times_ms[i] = -1234.0;
-    _par_last_update_rs_processed_buffers[i] = -1234.0;
-    _par_last_scan_rs_times_ms[i] = -1234.0;
-    _par_last_obj_copy_times_ms[i] = -1234.0;
-    _par_last_termination_times_ms[i] = -1234.0;
-    _par_last_termination_attempts[i] = -1234.0;
-    _par_last_gc_worker_end_times_ms[i] = -1234.0;
-    _par_last_gc_worker_times_ms[i] = -1234.0;
-    _par_last_gc_worker_other_times_ms[i] = -1234.0;
-  }
-#endif
-
-  // This is initialized to zero here and is set during the evacuation
-  // pause if we actually waited for the root region scanning to finish.
-  _root_region_scan_wait_time_ms = 0.0;
-
   _last_gc_was_young = false;
 
   // do that for any other surv rate groups
@@ -974,127 +859,6 @@
   }
 }
 
-void G1CollectorPolicy::record_concurrent_pause_end() {
-}
-
-template<class T>
-T sum_of(T* sum_arr, int start, int n, int N) {
-  T sum = (T)0;
-  for (int i = 0; i < n; i++) {
-    int j = (start + i) % N;
-    sum += sum_arr[j];
-  }
-  return sum;
-}
-
-void G1CollectorPolicy::print_par_stats(int level,
-                                        const char* str,
-                                        double* data,
-                                        bool showDecimals) {
-  double min = data[0], max = data[0];
-  double total = 0.0;
-  LineBuffer buf(level);
-  buf.append("[%s (ms):", str);
-  for (uint i = 0; i < no_of_gc_threads(); ++i) {
-    double val = data[i];
-    if (val < min)
-      min = val;
-    if (val > max)
-      max = val;
-    total += val;
-    if (G1Log::finest()) {
-      if (showDecimals) {
-        buf.append("  %.1lf", val);
-      } else {
-        buf.append("  %d", (int)val);
-      }
-    }
-  }
-
-  if (G1Log::finest()) {
-    buf.append_and_print_cr("");
-  }
-  double avg = total / (double) no_of_gc_threads();
-  if (showDecimals) {
-    buf.append_and_print_cr(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf, Sum: %.1lf]",
-      min, avg, max, max - min, total);
-  } else {
-    buf.append_and_print_cr(" Min: %d, Avg: %d, Max: %d, Diff: %d, Sum: %d]",
-      (int)min, (int)avg, (int)max, (int)max - (int)min, (int)total);
-  }
-}
-
-void G1CollectorPolicy::print_stats(int level,
-                                    const char* str,
-                                    double value) {
-  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
-}
-
-void G1CollectorPolicy::print_stats(int level,
-                                    const char* str,
-                                    double value,
-                                    int workers) {
-  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %d]", str, value, workers);
-}
-
-void G1CollectorPolicy::print_stats(int level,
-                                    const char* str,
-                                    int value) {
-  LineBuffer(level).append_and_print_cr("[%s: %d]", str, value);
-}
-
-double G1CollectorPolicy::avg_value(double* data) {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    double ret = 0.0;
-    for (uint i = 0; i < no_of_gc_threads(); ++i) {
-      ret += data[i];
-    }
-    return ret / (double) no_of_gc_threads();
-  } else {
-    return data[0];
-  }
-}
-
-double G1CollectorPolicy::max_value(double* data) {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    double ret = data[0];
-    for (uint i = 1; i < no_of_gc_threads(); ++i) {
-      if (data[i] > ret) {
-        ret = data[i];
-      }
-    }
-    return ret;
-  } else {
-    return data[0];
-  }
-}
-
-double G1CollectorPolicy::sum_of_values(double* data) {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    double sum = 0.0;
-    for (uint i = 0; i < no_of_gc_threads(); i++) {
-      sum += data[i];
-    }
-    return sum;
-  } else {
-    return data[0];
-  }
-}
-
-double G1CollectorPolicy::max_sum(double* data1, double* data2) {
-  double ret = data1[0] + data2[0];
-
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    for (uint i = 1; i < no_of_gc_threads(); ++i) {
-      double data = data1[i] + data2[i];
-      if (data > ret) {
-        ret = data;
-      }
-    }
-  }
-  return ret;
-}
-
 bool G1CollectorPolicy::need_to_start_conc_mark(const char* source, size_t alloc_word_size) {
   if (_g1->concurrent_mark()->cmThread()->during_cycle()) {
     return false;
@@ -1142,10 +906,8 @@
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
+void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms) {
   double end_time_sec = os::elapsedTime();
-  double elapsed_ms = _last_pause_time_ms;
-  bool parallel = G1CollectedHeap::use_parallel_gc_threads();
   assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(),
          "otherwise, the subtraction below does not make sense");
   size_t rs_size =
@@ -1154,7 +916,6 @@
   assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
   bool last_pause_included_initial_mark = false;
   bool update_stats = !_g1->evacuation_failed();
-  set_no_of_gc_threads(no_of_gc_threads);
 
 #ifndef PRODUCT
   if (G1YoungSurvRateVerbose) {
@@ -1174,7 +935,7 @@
     set_initiate_conc_mark_if_possible();
   }
 
-  _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
+  _mmu_tracker->add_pause(end_time_sec - pause_time_ms/1000.0,
                           end_time_sec, false);
 
   size_t freed_bytes =
@@ -1185,58 +946,11 @@
     (double)surviving_bytes/
     (double)_collection_set_bytes_used_before;
 
-  // These values are used to update the summary information that is
-  // displayed when TraceGen0Time is enabled, and are output as part
-  // of the "finer" output, in the non-parallel case.
-
-  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
-  double satb_filtering_time = avg_value(_par_last_satb_filtering_times_ms);
-  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
-  double update_rs_processed_buffers =
-    sum_of_values(_par_last_update_rs_processed_buffers);
-  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
-  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
-  double termination_time = avg_value(_par_last_termination_times_ms);
-
-  double known_time = ext_root_scan_time +
-                      satb_filtering_time +
-                      update_rs_time +
-                      scan_rs_time +
-                      obj_copy_time;
-
-  double other_time_ms = elapsed_ms;
-
-  // Subtract the root region scanning wait time. It's initialized to
-  // zero at the start of the pause.
-  other_time_ms -= _root_region_scan_wait_time_ms;
-
-  if (parallel) {
-    other_time_ms -= _cur_collection_par_time_ms;
-  } else {
-    other_time_ms -= known_time;
-  }
-
-  // Now subtract the time taken to fix up roots in generated code
-  other_time_ms -= _cur_collection_code_root_fixup_time_ms;
-
-  // Subtract the time taken to clean the card table from the
-  // current value of "other time"
-  other_time_ms -= _cur_clear_ct_time_ms;
-
-  // TraceGen0Time and TraceGen1Time summary info updating.
-
   if (update_stats) {
-    double parallel_known_time = known_time + termination_time;
-    double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time;
-
-    _trace_gen0_time_data.record_end_collection(
-      elapsed_ms, other_time_ms, _root_region_scan_wait_time_ms, _cur_collection_par_time_ms,
-      ext_root_scan_time, satb_filtering_time, update_rs_time, scan_rs_time, obj_copy_time,
-      termination_time, parallel_other_time, _cur_clear_ct_time_ms);
-
+    _trace_gen0_time_data.record_end_collection(pause_time_ms, phase_times());
     // this is where we update the allocation rate of the application
     double app_time_ms =
-      (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms);
+      (phase_times()->_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms);
     if (app_time_ms < MIN_TIMER_GRANULARITY) {
       // This usually happens due to the timer not having the required
       // granularity. Some Linuxes are the usual culprits.
@@ -1257,7 +971,7 @@
 
     double interval_ms =
       (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0;
-    update_recent_gc_times(end_time_sec, elapsed_ms);
+    update_recent_gc_times(end_time_sec, pause_time_ms);
     _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms;
     if (recent_avg_pause_time_ratio() < 0.0 ||
         (recent_avg_pause_time_ratio() - 1.0 > 0.0)) {
@@ -1284,90 +998,6 @@
       }
     }
   }
-
-  if (G1Log::finer()) {
-    bool print_marking_info =
-      _g1->mark_in_progress() && !last_pause_included_initial_mark;
-
-    gclog_or_tty->print_cr("%s, %1.8lf secs]",
-                           (last_pause_included_initial_mark) ? " (initial-mark)" : "",
-                           elapsed_ms / 1000.0);
-
-    if (_root_region_scan_wait_time_ms > 0.0) {
-      print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
-    }
-    if (parallel) {
-      print_stats(1, "Parallel Time", _cur_collection_par_time_ms, no_of_gc_threads);
-      print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
-      print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
-      if (print_marking_info) {
-        print_par_stats(2, "SATB Filtering", _par_last_satb_filtering_times_ms);
-      }
-      print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
-      if (G1Log::finest()) {
-        print_par_stats(3, "Processed Buffers", _par_last_update_rs_processed_buffers,
-          false /* showDecimals */);
-      }
-      print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
-      print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
-      print_par_stats(2, "Termination", _par_last_termination_times_ms);
-      if (G1Log::finest()) {
-        print_par_stats(3, "Termination Attempts", _par_last_termination_attempts,
-          false /* showDecimals */);
-      }
-
-      for (int i = 0; i < _parallel_gc_threads; i++) {
-        _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] -
-                                          _par_last_gc_worker_start_times_ms[i];
-
-        double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
-                                   _par_last_satb_filtering_times_ms[i] +
-                                   _par_last_update_rs_times_ms[i] +
-                                   _par_last_scan_rs_times_ms[i] +
-                                   _par_last_obj_copy_times_ms[i] +
-                                   _par_last_termination_times_ms[i];
-
-        _par_last_gc_worker_other_times_ms[i] = _par_last_gc_worker_times_ms[i] -
-                                                worker_known_time;
-      }
-
-      print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms);
-      print_par_stats(2, "GC Worker Total", _par_last_gc_worker_times_ms);
-      print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
-    } else {
-      print_stats(1, "Ext Root Scanning", ext_root_scan_time);
-      if (print_marking_info) {
-        print_stats(1, "SATB Filtering", satb_filtering_time);
-      }
-      print_stats(1, "Update RS", update_rs_time);
-      if (G1Log::finest()) {
-        print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
-      }
-      print_stats(1, "Scan RS", scan_rs_time);
-      print_stats(1, "Object Copying", obj_copy_time);
-    }
-    print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
-    print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
-#ifndef PRODUCT
-    print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
-    print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms);
-    print_stats(1, "Min Clear CC", _min_clear_cc_time_ms);
-    print_stats(1, "Max Clear CC", _max_clear_cc_time_ms);
-    if (_num_cc_clears > 0) {
-      print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears));
-    }
-#endif
-    print_stats(1, "Other", other_time_ms);
-    print_stats(2, "Choose CSet",
-                   (_recorded_young_cset_choice_time_ms +
-                    _recorded_non_young_cset_choice_time_ms));
-    print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
-    print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
-    print_stats(2, "Free CSet",
-                   (_recorded_young_free_cset_time_ms +
-                    _recorded_non_young_free_cset_time_ms));
-  }
-
   bool new_in_marking_window = _in_marking_window;
   bool new_in_marking_window_im = false;
   if (during_initial_mark_pause()) {
@@ -1406,8 +1036,6 @@
   // do that for any other surv rate groupsx
 
   if (update_stats) {
-    double pause_time_ms = elapsed_ms;
-
     size_t diff = 0;
     if (_max_pending_cards >= _pending_cards) {
       diff = _max_pending_cards - _pending_cards;
@@ -1416,7 +1044,7 @@
 
     double cost_per_card_ms = 0.0;
     if (_pending_cards > 0) {
-      cost_per_card_ms = update_rs_time / (double) _pending_cards;
+      cost_per_card_ms = phase_times()->_update_rs_time / (double) _pending_cards;
       _cost_per_card_ms_seq->add(cost_per_card_ms);
     }
 
@@ -1424,7 +1052,7 @@
 
     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
-      cost_per_entry_ms = scan_rs_time / (double) cards_scanned;
+      cost_per_entry_ms = phase_times()->_scan_rs_time / (double) cards_scanned;
       if (_last_gc_was_young) {
         _cost_per_entry_ms_seq->add(cost_per_entry_ms);
       } else {
@@ -1464,7 +1092,7 @@
     size_t copied_bytes = surviving_bytes;
     double cost_per_byte_ms = 0.0;
     if (copied_bytes > 0) {
-      cost_per_byte_ms = obj_copy_time / (double) copied_bytes;
+      cost_per_byte_ms = phase_times()->_obj_copy_time / (double) copied_bytes;
       if (_in_marking_window) {
         _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
       } else {
@@ -1473,21 +1101,21 @@
     }
 
     double all_other_time_ms = pause_time_ms -
-      (update_rs_time + scan_rs_time + obj_copy_time + termination_time);
+      (phase_times()->_update_rs_time + phase_times()->_scan_rs_time + phase_times()->_obj_copy_time + phase_times()->_termination_time);
 
     double young_other_time_ms = 0.0;
     if (young_cset_region_length() > 0) {
       young_other_time_ms =
-        _recorded_young_cset_choice_time_ms +
-        _recorded_young_free_cset_time_ms;
+        phase_times()->_recorded_young_cset_choice_time_ms +
+        phase_times()->_recorded_young_free_cset_time_ms;
       _young_other_cost_per_region_ms_seq->add(young_other_time_ms /
                                           (double) young_cset_region_length());
     }
     double non_young_other_time_ms = 0.0;
     if (old_cset_region_length() > 0) {
       non_young_other_time_ms =
-        _recorded_non_young_cset_choice_time_ms +
-        _recorded_non_young_free_cset_time_ms;
+        phase_times()->_recorded_non_young_cset_choice_time_ms +
+        phase_times()->_recorded_non_young_free_cset_time_ms;
 
       _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms /
                                             (double) old_cset_region_length());
@@ -1514,7 +1142,7 @@
 
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
-  adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms);
+  adjust_concurrent_refinement(phase_times()->_update_rs_time, phase_times()->_update_rs_processed_buffers, update_rs_time_goal_ms);
 
   _collectionSetChooser->verify();
 }
@@ -2323,7 +1951,7 @@
   set_recorded_rs_lengths(_inc_cset_recorded_rs_lengths);
 
   double young_end_time_sec = os::elapsedTime();
-  _recorded_young_cset_choice_time_ms =
+  phase_times()->_recorded_young_cset_choice_time_ms =
     (young_end_time_sec - young_start_time_sec) * 1000.0;
 
   // We are doing young collections so reset this.
@@ -2439,7 +2067,7 @@
                 predicted_pause_time_ms, target_pause_time_ms);
 
   double non_young_end_time_sec = os::elapsedTime();
-  _recorded_non_young_cset_choice_time_ms =
+  phase_times()->_recorded_non_young_cset_choice_time_ms =
     (non_young_end_time_sec - non_young_start_time_sec) * 1000.0;
 }
 
@@ -2455,33 +2083,29 @@
   }
 }
 
-void TraceGen0TimeData::record_end_collection(
-     double total_ms,
-     double other_ms,
-     double root_region_scan_wait_ms,
-     double parallel_ms,
-     double ext_root_scan_ms,
-     double satb_filtering_ms,
-     double update_rs_ms,
-     double scan_rs_ms,
-     double obj_copy_ms,
-     double termination_ms,
-     double parallel_other_ms,
-     double clear_ct_ms)
-{
+void TraceGen0TimeData::record_end_collection(double pause_time_ms, G1GCPhaseTimes* phase_times) {
   if(TraceGen0Time) {
-    _total.add(total_ms);
-    _other.add(other_ms);
-    _root_region_scan_wait.add(root_region_scan_wait_ms);
-    _parallel.add(parallel_ms);
-    _ext_root_scan.add(ext_root_scan_ms);
-    _satb_filtering.add(satb_filtering_ms);
-    _update_rs.add(update_rs_ms);
-    _scan_rs.add(scan_rs_ms);
-    _obj_copy.add(obj_copy_ms);
-    _termination.add(termination_ms);
-    _parallel_other.add(parallel_other_ms);
-    _clear_ct.add(clear_ct_ms);
+    _total.add(pause_time_ms);
+    _other.add(pause_time_ms - phase_times->accounted_time_ms());
+    _root_region_scan_wait.add(phase_times->_root_region_scan_wait_time_ms);
+    _parallel.add(phase_times->_cur_collection_par_time_ms);
+    _ext_root_scan.add(phase_times->_ext_root_scan_time);
+    _satb_filtering.add(phase_times->_satb_filtering_time);
+    _update_rs.add(phase_times->_update_rs_time);
+    _scan_rs.add(phase_times->_scan_rs_time);
+    _obj_copy.add(phase_times->_obj_copy_time);
+    _termination.add(phase_times->_termination_time);
+
+    double parallel_known_time = phase_times->_ext_root_scan_time +
+      phase_times->_satb_filtering_time +
+      phase_times->_update_rs_time +
+      phase_times->_scan_rs_time +
+      phase_times->_obj_copy_time +
+      + phase_times->_termination_time;
+
+    double parallel_other_time = phase_times->_cur_collection_par_time_ms - parallel_known_time;
+    _parallel_other.add(parallel_other_time);
+    _clear_ct.add(phase_times->_cur_clear_ct_time_ms);
   }
 }
 
@@ -2497,20 +2121,18 @@
   }
 }
 
-void TraceGen0TimeData::print_summary(int level,
-                                      const char* str,
+void TraceGen0TimeData::print_summary(const char* str,
                                       const NumberSeq* seq) const {
   double sum = seq->sum();
-  LineBuffer(level + 1).append_and_print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)",
+  gclog_or_tty->print_cr("%-27s = %8.2lf s (avg = %8.2lf ms)",
                 str, sum / 1000.0, seq->avg());
 }
 
-void TraceGen0TimeData::print_summary_sd(int level,
-                                         const char* str,
+void TraceGen0TimeData::print_summary_sd(const char* str,
                                          const NumberSeq* seq) const {
-  print_summary(level, str, seq);
-  LineBuffer(level + 6).append_and_print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)",
-                seq->num(), seq->sd(), seq->maximum());
+  print_summary(str, seq);
+  gclog_or_tty->print_cr("%+45s = %5d, std dev = %8.2lf ms, max = %8.2lf ms)",
+                "(num", seq->num(), seq->sd(), seq->maximum());
 }
 
 void TraceGen0TimeData::print() const {
@@ -2519,7 +2141,7 @@
   }
 
   gclog_or_tty->print_cr("ALL PAUSES");
-  print_summary_sd(0, "Total", &_total);
+  print_summary_sd("   Total", &_total);
   gclog_or_tty->print_cr("");
   gclog_or_tty->print_cr("");
   gclog_or_tty->print_cr("   Young GC Pauses: %8d", _young_pause_num);
@@ -2531,24 +2153,24 @@
   if (_young_pause_num == 0 && _mixed_pause_num == 0) {
     gclog_or_tty->print_cr("none");
   } else {
-    print_summary_sd(0, "Evacuation Pauses", &_total);
-    print_summary(1, "Root Region Scan Wait", &_root_region_scan_wait);
-    print_summary(1, "Parallel Time", &_parallel);
-    print_summary(2, "Ext Root Scanning", &_ext_root_scan);
-    print_summary(2, "SATB Filtering", &_satb_filtering);
-    print_summary(2, "Update RS", &_update_rs);
-    print_summary(2, "Scan RS", &_scan_rs);
-    print_summary(2, "Object Copy", &_obj_copy);
-    print_summary(2, "Termination", &_termination);
-    print_summary(2, "Parallel Other", &_parallel_other);
-    print_summary(1, "Clear CT", &_clear_ct);
-    print_summary(1, "Other", &_other);
+    print_summary_sd("   Evacuation Pauses", &_total);
+    print_summary("      Root Region Scan Wait", &_root_region_scan_wait);
+    print_summary("      Parallel Time", &_parallel);
+    print_summary("         Ext Root Scanning", &_ext_root_scan);
+    print_summary("         SATB Filtering", &_satb_filtering);
+    print_summary("         Update RS", &_update_rs);
+    print_summary("         Scan RS", &_scan_rs);
+    print_summary("         Object Copy", &_obj_copy);
+    print_summary("         Termination", &_termination);
+    print_summary("         Parallel Other", &_parallel_other);
+    print_summary("      Clear CT", &_clear_ct);
+    print_summary("      Other", &_other);
   }
   gclog_or_tty->print_cr("");
 
   gclog_or_tty->print_cr("MISC");
-  print_summary_sd(0, "Stop World", &_all_stop_world_times_ms);
-  print_summary_sd(0, "Yields", &_all_yield_times_ms);
+  print_summary_sd("   Stop World", &_all_stop_world_times_ms);
+  print_summary_sd("   Yields", &_all_yield_times_ms);
 }
 
 void TraceGen1TimeData::record_full_collection(double full_gc_time_ms) {
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -36,6 +36,7 @@
 
 class HeapRegion;
 class CollectionSetChooser;
+class G1GCPhaseTimes;
 
 // TraceGen0Time collects data on _both_ young and mixed evacuation pauses
 // (the latter may contain non-young regions - i.e. regions that are
@@ -61,26 +62,14 @@
   NumberSeq _parallel_other;
   NumberSeq _clear_ct;
 
-  void print_summary (int level, const char* str, const NumberSeq* seq) const;
-  void print_summary_sd (int level, const char* str, const NumberSeq* seq) const;
+  void print_summary(const char* str, const NumberSeq* seq) const;
+  void print_summary_sd(const char* str, const NumberSeq* seq) const;
 
 public:
    TraceGen0TimeData() : _young_pause_num(0), _mixed_pause_num(0) {};
   void record_start_collection(double time_to_stop_the_world_ms);
   void record_yield_time(double yield_time_ms);
-  void record_end_collection(
-     double total_ms,
-     double other_ms,
-     double root_region_scan_wait_ms,
-     double parallel_ms,
-     double ext_root_scan_ms,
-     double satb_filtering_ms,
-     double update_rs_ms,
-     double scan_rs_ms,
-     double obj_copy_ms,
-     double termination_ms,
-     double parallel_other_ms,
-     double clear_ct_ms);
+  void record_end_collection(double pause_time_ms, G1GCPhaseTimes* phase_times);
   void increment_young_collection_count();
   void increment_mixed_collection_count();
   void print() const;
@@ -186,25 +175,9 @@
 
   CollectionSetChooser* _collectionSetChooser;
 
-  double _cur_collection_start_sec;
+  double _full_collection_start_sec;
   size_t _cur_collection_pause_used_at_start_bytes;
   uint   _cur_collection_pause_used_regions_at_start;
-  double _cur_collection_par_time_ms;
-
-  double _cur_collection_code_root_fixup_time_ms;
-
-  double _cur_clear_ct_time_ms;
-  double _cur_ref_proc_time_ms;
-  double _cur_ref_enq_time_ms;
-
-#ifndef PRODUCT
-  // Card Table Count Cache stats
-  double _min_clear_cc_time_ms;         // min
-  double _max_clear_cc_time_ms;         // max
-  double _cur_clear_cc_time_ms;         // clearing time during current pause
-  double _cum_clear_cc_time_ms;         // cummulative clearing time
-  jlong  _num_cc_clears;                // number of times the card count cache has been cleared
-#endif
 
   // These exclude marking times.
   TruncatedSeq* _recent_gc_times_ms;
@@ -217,23 +190,6 @@
 
   double _stop_world_start;
 
-  double* _par_last_gc_worker_start_times_ms;
-  double* _par_last_ext_root_scan_times_ms;
-  double* _par_last_satb_filtering_times_ms;
-  double* _par_last_update_rs_times_ms;
-  double* _par_last_update_rs_processed_buffers;
-  double* _par_last_scan_rs_times_ms;
-  double* _par_last_obj_copy_times_ms;
-  double* _par_last_termination_times_ms;
-  double* _par_last_termination_attempts;
-  double* _par_last_gc_worker_end_times_ms;
-  double* _par_last_gc_worker_times_ms;
-
-  // Each workers 'other' time i.e. the elapsed time of the parallel
-  // code executed by a worker minus the sum of the individual sub-phase
-  // times for that worker thread.
-  double* _par_last_gc_worker_other_times_ms;
-
   // indicates whether we are in young or mixed GC mode
   bool _gcs_are_young;
 
@@ -306,10 +262,6 @@
 
   size_t _recorded_rs_lengths;
   size_t _max_rs_lengths;
-
-  double _recorded_young_free_cset_time_ms;
-  double _recorded_non_young_free_cset_time_ms;
-
   double _sigma;
 
   size_t _rs_lengths_prediction;
@@ -341,8 +293,7 @@
   void set_no_of_gc_threads(uintx v) { _no_of_gc_threads = v; }
 
   double _pause_time_target_ms;
-  double _recorded_young_cset_choice_time_ms;
-  double _recorded_non_young_cset_choice_time_ms;
+
   size_t _pending_cards;
   size_t _max_pending_cards;
 
@@ -497,14 +448,6 @@
   uint young_cset_region_length() { return eden_cset_region_length() +
                                            survivor_cset_region_length(); }
 
-  void record_young_free_cset_time_ms(double time_ms) {
-    _recorded_young_free_cset_time_ms = time_ms;
-  }
-
-  void record_non_young_free_cset_time_ms(double time_ms) {
-    _recorded_non_young_free_cset_time_ms = time_ms;
-  }
-
   double predict_survivor_regions_evac_time();
 
   void cset_regions_freed() {
@@ -552,19 +495,6 @@
   }
 
 private:
-  void print_stats(int level, const char* str, double value);
-  void print_stats(int level, const char* str, double value, int workers);
-  void print_stats(int level, const char* str, int value);
-
-  void print_par_stats(int level, const char* str, double* data, bool showDecimals = true);
-
-  double avg_value (double* data);
-  double max_value (double* data);
-  double sum_of_values (double* data);
-  double max_sum (double* data1, double* data2);
-
-  double _last_pause_time_ms;
-
   size_t _bytes_in_collection_set_before_gc;
   size_t _bytes_copied_during_gc;
 
@@ -638,6 +568,8 @@
   // Stash a pointer to the g1 heap.
   G1CollectedHeap* _g1;
 
+  G1GCPhaseTimes* _phase_times;
+
   // The ratio of gc time to elapsed time, computed over recent pauses.
   double _recent_avg_pause_time_ratio;
 
@@ -677,7 +609,6 @@
   double _cur_mark_stop_world_time_ms;
   double _mark_remark_start_sec;
   double _mark_cleanup_start_sec;
-  double _root_region_scan_wait_time_ms;
 
   // Update the young list target length either by setting it to the
   // desired fixed value or by calculating it using G1's pause
@@ -728,6 +659,8 @@
     return CollectorPolicy::G1CollectorPolicyKind;
   }
 
+  G1GCPhaseTimes* phase_times() const { return _phase_times; }
+
   // Check the current value of the young list RSet lengths and
   // compare it against the last prediction. If the current value is
   // higher, recalculate the young list target length prediction.
@@ -772,10 +705,6 @@
   void record_concurrent_mark_init_end(double
                                            mark_init_elapsed_time_ms);
 
-  void record_root_region_scan_wait_time(double time_ms) {
-    _root_region_scan_wait_time_ms = time_ms;
-  }
-
   void record_concurrent_mark_remark_start();
   void record_concurrent_mark_remark_end();
 
@@ -784,97 +713,14 @@
   void record_concurrent_mark_cleanup_completed();
 
   void record_concurrent_pause();
-  void record_concurrent_pause_end();
 
-  void record_collection_pause_end(int no_of_gc_threads);
+  void record_collection_pause_end(double pause_time);
   void print_heap_transition();
 
   // Record the fact that a full collection occurred.
   void record_full_collection_start();
   void record_full_collection_end();
 
-  void record_gc_worker_start_time(int worker_i, double ms) {
-    _par_last_gc_worker_start_times_ms[worker_i] = ms;
-  }
-
-  void record_ext_root_scan_time(int worker_i, double ms) {
-    _par_last_ext_root_scan_times_ms[worker_i] = ms;
-  }
-
-  void record_satb_filtering_time(int worker_i, double ms) {
-    _par_last_satb_filtering_times_ms[worker_i] = ms;
-  }
-
-  void record_update_rs_time(int thread, double ms) {
-    _par_last_update_rs_times_ms[thread] = ms;
-  }
-
-  void record_update_rs_processed_buffers (int thread,
-                                           double processed_buffers) {
-    _par_last_update_rs_processed_buffers[thread] = processed_buffers;
-  }
-
-  void record_scan_rs_time(int thread, double ms) {
-    _par_last_scan_rs_times_ms[thread] = ms;
-  }
-
-  void reset_obj_copy_time(int thread) {
-    _par_last_obj_copy_times_ms[thread] = 0.0;
-  }
-
-  void reset_obj_copy_time() {
-    reset_obj_copy_time(0);
-  }
-
-  void record_obj_copy_time(int thread, double ms) {
-    _par_last_obj_copy_times_ms[thread] += ms;
-  }
-
-  void record_termination(int thread, double ms, size_t attempts) {
-    _par_last_termination_times_ms[thread] = ms;
-    _par_last_termination_attempts[thread] = (double) attempts;
-  }
-
-  void record_gc_worker_end_time(int worker_i, double ms) {
-    _par_last_gc_worker_end_times_ms[worker_i] = ms;
-  }
-
-  void record_pause_time_ms(double ms) {
-    _last_pause_time_ms = ms;
-  }
-
-  void record_clear_ct_time(double ms) {
-    _cur_clear_ct_time_ms = ms;
-  }
-
-  void record_par_time(double ms) {
-    _cur_collection_par_time_ms = ms;
-  }
-
-  void record_code_root_fixup_time(double ms) {
-    _cur_collection_code_root_fixup_time_ms = ms;
-  }
-
-  void record_ref_proc_time(double ms) {
-    _cur_ref_proc_time_ms = ms;
-  }
-
-  void record_ref_enq_time(double ms) {
-    _cur_ref_enq_time_ms = ms;
-  }
-
-#ifndef PRODUCT
-  void record_cc_clear_time(double ms) {
-    if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms)
-      _min_clear_cc_time_ms = ms;
-    if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms)
-      _max_clear_cc_time_ms = ms;
-    _cur_clear_cc_time_ms = ms;
-    _cum_clear_cc_time_ms += ms;
-    _num_cc_clears++;
-  }
-#endif
-
   // Record how much space we copied during a GC. This is typically
   // called when a GC alloc region is being retired.
   void record_bytes_copied_during_gc(size_t bytes) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1Log.hpp"
+
+// Helper class for avoiding interleaved logging
+class LineBuffer: public StackObj {
+
+private:
+  static const int BUFFER_LEN = 1024;
+  static const int INDENT_CHARS = 3;
+  char _buffer[BUFFER_LEN];
+  int _indent_level;
+  int _cur;
+
+  void vappend(const char* format, va_list ap) {
+    int res = vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
+    if (res != -1) {
+      _cur += res;
+    } else {
+      DEBUG_ONLY(warning("buffer too small in LineBuffer");)
+      _buffer[BUFFER_LEN -1] = 0;
+      _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
+    }
+  }
+
+public:
+  explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
+    for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
+      _buffer[_cur] = ' ';
+    }
+  }
+
+#ifndef PRODUCT
+  ~LineBuffer() {
+    assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
+  }
+#endif
+
+  void append(const char* format, ...) {
+    va_list ap;
+    va_start(ap, format);
+    vappend(format, ap);
+    va_end(ap);
+  }
+
+  void append_and_print_cr(const char* format, ...) {
+    va_list ap;
+    va_start(ap, format);
+    vappend(format, ap);
+    va_end(ap);
+    gclog_or_tty->print_cr("%s", _buffer);
+    _cur = _indent_level * INDENT_CHARS;
+  }
+};
+
+G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
+  _max_gc_threads(max_gc_threads),
+  _min_clear_cc_time_ms(-1.0),
+  _max_clear_cc_time_ms(-1.0),
+  _cur_clear_cc_time_ms(0.0),
+  _cum_clear_cc_time_ms(0.0),
+  _num_cc_clears(0L)
+{
+  assert(max_gc_threads > 0, "Must have some GC threads");
+  _par_last_gc_worker_start_times_ms = new double[_max_gc_threads];
+  _par_last_ext_root_scan_times_ms = new double[_max_gc_threads];
+  _par_last_satb_filtering_times_ms = new double[_max_gc_threads];
+  _par_last_update_rs_times_ms = new double[_max_gc_threads];
+  _par_last_update_rs_processed_buffers = new double[_max_gc_threads];
+  _par_last_scan_rs_times_ms = new double[_max_gc_threads];
+  _par_last_obj_copy_times_ms = new double[_max_gc_threads];
+  _par_last_termination_times_ms = new double[_max_gc_threads];
+  _par_last_termination_attempts = new double[_max_gc_threads];
+  _par_last_gc_worker_end_times_ms = new double[_max_gc_threads];
+  _par_last_gc_worker_times_ms = new double[_max_gc_threads];
+  _par_last_gc_worker_other_times_ms = new double[_max_gc_threads];
+}
+
+void G1GCPhaseTimes::note_gc_start(double pause_start_time_sec, uint active_gc_threads,
+  bool is_young_gc, bool is_initial_mark_gc, GCCause::Cause gc_cause) {
+  assert(active_gc_threads > 0, "The number of threads must be > 0");
+  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads");
+  _active_gc_threads = active_gc_threads;
+  _pause_start_time_sec = pause_start_time_sec;
+  _is_young_gc = is_young_gc;
+  _is_initial_mark_gc = is_initial_mark_gc;
+  _gc_cause = gc_cause;
+
+#ifdef ASSERT
+  // initialise the timing data to something well known so that we can spot
+  // if something is not set properly
+
+  for (uint i = 0; i < _max_gc_threads; ++i) {
+    _par_last_gc_worker_start_times_ms[i] = -1234.0;
+    _par_last_ext_root_scan_times_ms[i] = -1234.0;
+    _par_last_satb_filtering_times_ms[i] = -1234.0;
+    _par_last_update_rs_times_ms[i] = -1234.0;
+    _par_last_update_rs_processed_buffers[i] = -1234.0;
+    _par_last_scan_rs_times_ms[i] = -1234.0;
+    _par_last_obj_copy_times_ms[i] = -1234.0;
+    _par_last_termination_times_ms[i] = -1234.0;
+    _par_last_termination_attempts[i] = -1234.0;
+    _par_last_gc_worker_end_times_ms[i] = -1234.0;
+    _par_last_gc_worker_times_ms[i] = -1234.0;
+    _par_last_gc_worker_other_times_ms[i] = -1234.0;
+  }
+#endif
+}
+
+void G1GCPhaseTimes::note_gc_end(double pause_end_time_sec) {
+  if (G1Log::fine()) {
+    double pause_time_ms = (pause_end_time_sec - _pause_start_time_sec) * MILLIUNITS;
+
+    for (uint i = 0; i < _active_gc_threads; i++) {
+      _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] -
+        _par_last_gc_worker_start_times_ms[i];
+
+      double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
+        _par_last_satb_filtering_times_ms[i] +
+        _par_last_update_rs_times_ms[i] +
+        _par_last_scan_rs_times_ms[i] +
+        _par_last_obj_copy_times_ms[i] +
+        _par_last_termination_times_ms[i];
+
+      _par_last_gc_worker_other_times_ms[i] = _par_last_gc_worker_times_ms[i] -
+        worker_known_time;
+    }
+
+    print(pause_time_ms);
+  }
+
+}
+
+void G1GCPhaseTimes::print_par_stats(int level,
+                                        const char* str,
+                                        double* data,
+                                        bool showDecimals) {
+  double min = data[0], max = data[0];
+  double total = 0.0;
+  LineBuffer buf(level);
+  buf.append("[%s (ms):", str);
+  for (uint i = 0; i < _active_gc_threads; ++i) {
+    double val = data[i];
+    if (val < min)
+      min = val;
+    if (val > max)
+      max = val;
+    total += val;
+    if (G1Log::finest()) {
+      if (showDecimals) {
+        buf.append("  %.1lf", val);
+      } else {
+        buf.append("  %d", (int)val);
+      }
+    }
+  }
+
+  if (G1Log::finest()) {
+    buf.append_and_print_cr("");
+  }
+  double avg = total / (double) _active_gc_threads;
+  if (showDecimals) {
+    buf.append_and_print_cr(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf, Sum: %.1lf]",
+      min, avg, max, max - min, total);
+  } else {
+    buf.append_and_print_cr(" Min: %d, Avg: %d, Max: %d, Diff: %d, Sum: %d]",
+      (int)min, (int)avg, (int)max, (int)max - (int)min, (int)total);
+  }
+}
+
+void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
+}
+
+void G1GCPhaseTimes::print_stats(int level, const char* str, double value, int workers) {
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %d]", str, value, workers);
+}
+
+void G1GCPhaseTimes::print_stats(int level, const char* str, int value) {
+  LineBuffer(level).append_and_print_cr("[%s: %d]", str, value);
+}
+
+double G1GCPhaseTimes::avg_value(double* data) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    double ret = 0.0;
+    for (uint i = 0; i < _active_gc_threads; ++i) {
+      ret += data[i];
+    }
+    return ret / (double) _active_gc_threads;
+  } else {
+    return data[0];
+  }
+}
+
+double G1GCPhaseTimes::max_value(double* data) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    double ret = data[0];
+    for (uint i = 1; i < _active_gc_threads; ++i) {
+      if (data[i] > ret) {
+        ret = data[i];
+      }
+    }
+    return ret;
+  } else {
+    return data[0];
+  }
+}
+
+double G1GCPhaseTimes::sum_of_values(double* data) {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    double sum = 0.0;
+    for (uint i = 0; i < _active_gc_threads; i++) {
+      sum += data[i];
+    }
+    return sum;
+  } else {
+    return data[0];
+  }
+}
+
+double G1GCPhaseTimes::max_sum(double* data1, double* data2) {
+  double ret = data1[0] + data2[0];
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    for (uint i = 1; i < _active_gc_threads; ++i) {
+      double data = data1[i] + data2[i];
+      if (data > ret) {
+        ret = data;
+      }
+    }
+  }
+  return ret;
+}
+
+void G1GCPhaseTimes::collapse_par_times() {
+    _ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
+    _satb_filtering_time = avg_value(_par_last_satb_filtering_times_ms);
+    _update_rs_time = avg_value(_par_last_update_rs_times_ms);
+    _update_rs_processed_buffers =
+      sum_of_values(_par_last_update_rs_processed_buffers);
+    _scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
+    _obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
+    _termination_time = avg_value(_par_last_termination_times_ms);
+}
+
+double G1GCPhaseTimes::accounted_time_ms() {
+    // Subtract the root region scanning wait time. It's initialized to
+    // zero at the start of the pause.
+    double misc_time_ms = _root_region_scan_wait_time_ms;
+
+    misc_time_ms += _cur_collection_par_time_ms;
+
+    // Now subtract the time taken to fix up roots in generated code
+    misc_time_ms += _cur_collection_code_root_fixup_time_ms;
+
+    // Subtract the time taken to clean the card table from the
+    // current value of "other time"
+    misc_time_ms += _cur_clear_ct_time_ms;
+
+    return misc_time_ms;
+}
+
+void G1GCPhaseTimes::print(double pause_time_ms) {
+
+  if (PrintGCTimeStamps) {
+    gclog_or_tty->stamp();
+    gclog_or_tty->print(": ");
+  }
+
+  GCCauseString gc_cause_str = GCCauseString("GC pause", _gc_cause)
+    .append(_is_young_gc ? " (young)" : " (mixed)")
+    .append(_is_initial_mark_gc ? " (initial-mark)" : "");
+  gclog_or_tty->print_cr("[%s, %3.7f secs]", (const char*)gc_cause_str, pause_time_ms / 1000.0);
+
+  if (!G1Log::finer()) {
+    return;
+  }
+
+  if (_root_region_scan_wait_time_ms > 0.0) {
+    print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
+  }
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
+    print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
+    print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
+    if (_satb_filtering_time > 0.0) {
+      print_par_stats(2, "SATB Filtering", _par_last_satb_filtering_times_ms);
+    }
+    print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
+    if (G1Log::finest()) {
+      print_par_stats(3, "Processed Buffers", _par_last_update_rs_processed_buffers,
+        false /* showDecimals */);
+    }
+    print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
+    print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
+    print_par_stats(2, "Termination", _par_last_termination_times_ms);
+    if (G1Log::finest()) {
+      print_par_stats(3, "Termination Attempts", _par_last_termination_attempts,
+        false /* showDecimals */);
+    }
+    print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms);
+    print_par_stats(2, "GC Worker Total", _par_last_gc_worker_times_ms);
+    print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
+  } else {
+    print_stats(1, "Ext Root Scanning", _ext_root_scan_time);
+    if (_satb_filtering_time > 0.0) {
+      print_stats(1, "SATB Filtering", _satb_filtering_time);
+    }
+    print_stats(1, "Update RS", _update_rs_time);
+    if (G1Log::finest()) {
+      print_stats(2, "Processed Buffers", (int)_update_rs_processed_buffers);
+    }
+    print_stats(1, "Scan RS", _scan_rs_time);
+    print_stats(1, "Object Copying", _obj_copy_time);
+  }
+  print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
+  print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
+  if (Verbose && G1Log::finest()) {
+    print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
+    print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms);
+    print_stats(1, "Min Clear CC", _min_clear_cc_time_ms);
+    print_stats(1, "Max Clear CC", _max_clear_cc_time_ms);
+    if (_num_cc_clears > 0) {
+      print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears));
+    }
+  }
+  double misc_time_ms = pause_time_ms - accounted_time_ms();
+  print_stats(1, "Other", misc_time_ms);
+  print_stats(2, "Choose CSet",
+    (_recorded_young_cset_choice_time_ms +
+    _recorded_non_young_cset_choice_time_ms));
+  print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
+  print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
+  print_stats(2, "Free CSet",
+    (_recorded_young_free_cset_time_ms +
+    _recorded_non_young_free_cset_time_ms));
+}
+
+void G1GCPhaseTimes::record_cc_clear_time_ms(double ms) {
+  if (!(Verbose && G1Log::finest())) {
+    return;
+  }
+
+  if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms) {
+    _min_clear_cc_time_ms = ms;
+  }
+  if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms) {
+    _max_clear_cc_time_ms = ms;
+  }
+  _cur_clear_cc_time_ms = ms;
+  _cum_clear_cc_time_ms += ms;
+  _num_cc_clears++;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
+
+#include "memory/allocation.hpp"
+#include "gc_interface/gcCause.hpp"
+
+class G1GCPhaseTimes : public CHeapObj<mtGC> {
+  friend class G1CollectorPolicy;
+  friend class TraceGen0TimeData;
+
+ private:
+  uint _active_gc_threads;
+  uint _max_gc_threads;
+
+  GCCause::Cause _gc_cause;
+  bool           _is_young_gc;
+  bool           _is_initial_mark_gc;
+
+  double _pause_start_time_sec;
+
+  double* _par_last_gc_worker_start_times_ms;
+  double* _par_last_ext_root_scan_times_ms;
+  double* _par_last_satb_filtering_times_ms;
+  double* _par_last_update_rs_times_ms;
+  double* _par_last_update_rs_processed_buffers;
+  double* _par_last_scan_rs_times_ms;
+  double* _par_last_obj_copy_times_ms;
+  double* _par_last_termination_times_ms;
+  double* _par_last_termination_attempts;
+  double* _par_last_gc_worker_end_times_ms;
+  double* _par_last_gc_worker_times_ms;
+  double* _par_last_gc_worker_other_times_ms;
+
+  double _cur_collection_par_time_ms;
+
+  double _cur_collection_code_root_fixup_time_ms;
+
+  double _cur_clear_ct_time_ms;
+  double _cur_ref_proc_time_ms;
+  double _cur_ref_enq_time_ms;
+
+  // Helper methods for detailed logging
+  void print_par_stats(int level, const char* str, double* data, bool showDecimals = true);
+  void print_stats(int level, const char* str, double value);
+  void print_stats(int level, const char* str, double value, int workers);
+  void print_stats(int level, const char* str, int value);
+  double avg_value(double* data);
+  double max_value(double* data);
+  double sum_of_values(double* data);
+  double max_sum(double* data1, double* data2);
+  double accounted_time_ms();
+
+  // Card Table Count Cache stats
+  double _min_clear_cc_time_ms;         // min
+  double _max_clear_cc_time_ms;         // max
+  double _cur_clear_cc_time_ms;         // clearing time during current pause
+  double _cum_clear_cc_time_ms;         // cummulative clearing time
+  jlong  _num_cc_clears;                // number of times the card count cache has been cleared
+
+  // The following insance variables are directly accessed by G1CollectorPolicy
+  // and TraceGen0TimeData. This is why those classes are declared friends.
+  // An alternative is to add getters and setters for all of these fields.
+  // It might also be possible to restructure the code to reduce these
+  // dependencies.
+  double _ext_root_scan_time;
+  double _satb_filtering_time;
+  double _update_rs_time;
+  double _update_rs_processed_buffers;
+  double _scan_rs_time;
+  double _obj_copy_time;
+  double _termination_time;
+
+  double _cur_collection_start_sec;
+  double _root_region_scan_wait_time_ms;
+
+  double _recorded_young_cset_choice_time_ms;
+  double _recorded_non_young_cset_choice_time_ms;
+
+  double _recorded_young_free_cset_time_ms;
+  double _recorded_non_young_free_cset_time_ms;
+
+  void print(double pause_time_ms);
+
+ public:
+  G1GCPhaseTimes(uint max_gc_threads);
+  void note_gc_start(double pause_start_time_sec, uint active_gc_threads,
+    bool is_young_gc, bool is_initial_mark_gc, GCCause::Cause gc_cause);
+  void note_gc_end(double pause_end_time_sec);
+  void collapse_par_times();
+
+  void record_gc_worker_start_time(uint worker_i, double ms) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_gc_worker_start_times_ms[worker_i] = ms;
+  }
+
+  void record_ext_root_scan_time(uint worker_i, double ms) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_ext_root_scan_times_ms[worker_i] = ms;
+  }
+
+  void record_satb_filtering_time(uint worker_i, double ms) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_satb_filtering_times_ms[worker_i] = ms;
+  }
+
+  void record_update_rs_time(uint worker_i, double ms) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_update_rs_times_ms[worker_i] = ms;
+  }
+
+  void record_update_rs_processed_buffers (uint worker_i,
+                                           double processed_buffers) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_update_rs_processed_buffers[worker_i] = processed_buffers;
+  }
+
+  void record_scan_rs_time(uint worker_i, double ms) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_scan_rs_times_ms[worker_i] = ms;
+  }
+
+  void reset_obj_copy_time(uint worker_i) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_obj_copy_times_ms[worker_i] = 0.0;
+  }
+
+  void reset_obj_copy_time() {
+    reset_obj_copy_time(0);
+  }
+
+  void record_obj_copy_time(uint worker_i, double ms) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_obj_copy_times_ms[worker_i] += ms;
+  }
+
+  void record_termination(uint worker_i, double ms, size_t attempts) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_termination_times_ms[worker_i] = ms;
+    _par_last_termination_attempts[worker_i] = (double) attempts;
+  }
+
+  void record_gc_worker_end_time(uint worker_i, double ms) {
+    assert(worker_i >= 0, "worker index must be > 0");
+    assert(worker_i < _active_gc_threads, "worker index out of bounds");
+    _par_last_gc_worker_end_times_ms[worker_i] = ms;
+  }
+
+  void record_clear_ct_time(double ms) {
+    _cur_clear_ct_time_ms = ms;
+  }
+
+  void record_par_time(double ms) {
+    _cur_collection_par_time_ms = ms;
+  }
+
+  void record_code_root_fixup_time(double ms) {
+    _cur_collection_code_root_fixup_time_ms = ms;
+  }
+
+  void record_ref_proc_time(double ms) {
+    _cur_ref_proc_time_ms = ms;
+  }
+
+  void record_ref_enq_time(double ms) {
+    _cur_ref_enq_time_ms = ms;
+  }
+
+  void record_root_region_scan_wait_time(double time_ms) {
+    _root_region_scan_wait_time_ms = time_ms;
+  }
+
+  void record_cc_clear_time_ms(double ms);
+
+  void record_young_free_cset_time_ms(double time_ms) {
+    _recorded_young_free_cset_time_ms = time_ms;
+  }
+
+  void record_non_young_free_cset_time_ms(double time_ms) {
+    _recorded_non_young_free_cset_time_ms = time_ms;
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -29,6 +29,7 @@
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
@@ -224,7 +225,7 @@
   assert( _cards_scanned != NULL, "invariant" );
   _cards_scanned[worker_i] = scanRScl.cards_done();
 
-  _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
+  _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
 }
 
 // Closure used for updating RSets and recording references that
@@ -276,7 +277,7 @@
     guarantee(cl.n() == 0, "Card table should be clean.");
   }
 
-  _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
+  _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
 }
 
 class CountRSSizeClosure: public HeapRegionClosure {
@@ -390,13 +391,13 @@
   if (G1UseParallelRSetUpdating || (worker_i == 0)) {
     updateRS(&into_cset_dcq, worker_i);
   } else {
-    _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
-    _g1p->record_update_rs_time(worker_i, 0.0);
+    _g1p->phase_times()->record_update_rs_processed_buffers(worker_i, 0.0);
+    _g1p->phase_times()->record_update_rs_time(worker_i, 0.0);
   }
   if (G1UseParallelRSetScanning || (worker_i == 0)) {
     scanRS(oc, worker_i);
   } else {
-    _g1p->record_scan_rs_time(worker_i, 0.0);
+    _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0);
   }
 
   // We now clear the cached values of _cset_rs_update_cl for this worker
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -546,23 +546,6 @@
     }
   }
 
-  if (is_put && !is_static && klass->is_subclass_of(SystemDictionary::CallSite_klass()) && (info.name() == vmSymbols::target_name())) {
-    const jint direction = frame::interpreter_frame_expression_stack_direction();
-    Handle call_site    (THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at(-1 * direction)));
-    Handle method_handle(THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at( 0 * direction)));
-    assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
-    assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
-
-    {
-      // Walk all nmethods depending on this call site.
-      MutexLocker mu(Compile_lock, thread);
-      Universe::flush_dependents_on(call_site, method_handle);
-    }
-
-    // Don't allow fast path for setting CallSite.target and sub-classes.
-    put_code = (Bytecodes::Code) 0;
-  }
-
   cache_entry(thread)->set_field(
     get_code,
     put_code,
--- a/src/share/vm/oops/methodOop.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/oops/methodOop.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -122,8 +122,9 @@
   u2                _max_locals;                 // Number of local variables used by this method
   u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
   u1                _intrinsic_id;               // vmSymbols::intrinsic_id (0 == _none)
-  u1                _jfr_towrite : 1,            // Flags
-                                 : 7;
+  u1                _jfr_towrite  : 1,           // Flags
+                    _force_inline : 1,
+                                  : 6;
   u2                _interpreter_throwout_count; // Count of times method was exited via exception while interpreting
   u2                _number_of_breakpoints;      // fullspeed debugging support
   InvocationCounter _invocation_counter;         // Incremented before each activation of the method - used to trigger frequency-based optimizations
@@ -655,6 +656,9 @@
   bool jfr_towrite()                 { return _jfr_towrite; }
   void set_jfr_towrite(bool towrite) { _jfr_towrite = towrite; }
 
+  bool force_inline()            { return _force_inline; }
+  void set_force_inline(bool fi) { _force_inline = fi; }
+
   // On-stack replacement support
   bool has_osr_nmethod(int level, bool match_level) {
    return instanceKlass::cast(method_holder())->lookup_osr_nmethod(this, InvocationEntryBci, level, match_level) != NULL;
--- a/src/share/vm/opto/stringopts.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/opto/stringopts.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -533,7 +533,17 @@
         if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) {
           CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava();
           if (csj->method() != NULL &&
-              csj->method()->intrinsic_id() == vmIntrinsics::_Integer_toString) {
+              csj->method()->intrinsic_id() == vmIntrinsics::_Integer_toString &&
+              arg->outcnt() == 1) {
+            // _control is the list of StringBuilder calls nodes which
+            // will be replaced by new String code after this optimization.
+            // Integer::toString() call is not part of StringBuilder calls
+            // chain. It could be eliminated only if its result is used
+            // only by this SB calls chain.
+            // Another limitation: it should be used only once because
+            // it is unknown that it is used only by this SB calls chain
+            // until all related SB calls nodes are collected.
+            assert(arg->unique_out() == cnode, "sanity");
             sc->add_control(csj);
             sc->push_int(csj->in(TypeFunc::Parms));
             continue;
--- a/src/share/vm/prims/methodHandles.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/prims/methodHandles.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -3180,17 +3180,15 @@
       jclass MH_class = env->FindClass(MH_name);
       status = env->RegisterNatives(MH_class, invoke_methods, sizeof(invoke_methods)/sizeof(JNINativeMethod));
     }
+    if (!env->ExceptionOccurred()) {
+      status = env->RegisterNatives(MHN_class, call_site_methods, sizeof(call_site_methods)/sizeof(JNINativeMethod));
+    }
     if (env->ExceptionOccurred()) {
       warning("JSR 292 method handle code is mismatched to this JVM.  Disabling support.");
       enable_MH = false;
       env->ExceptionClear();
     }
 
-    status = env->RegisterNatives(MHN_class, call_site_methods, sizeof(call_site_methods)/sizeof(JNINativeMethod));
-    if (env->ExceptionOccurred()) {
-      // Exception is okay until 7087357
-      env->ExceptionClear();
-    }
   }
 
   if (enable_MH) {
--- a/src/share/vm/prims/unsafe.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/prims/unsafe.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -178,17 +178,6 @@
     v = *(oop*)index_oop_from_field_offset_long(p, offset);                 \
   }
 
-#define GET_OOP_FIELD_VOLATILE(obj, offset, v) \
-  oop p = JNIHandles::resolve(obj);   \
-  volatile oop v;                     \
-  if (UseCompressedOops) {            \
-    volatile narrowOop n = *(volatile narrowOop*)index_oop_from_field_offset_long(p, offset); \
-    v = oopDesc::decode_heap_oop(n);                               \
-  } else {                            \
-    v = *(volatile oop*)index_oop_from_field_offset_long(p, offset);       \
-  } \
-  OrderAccess::acquire();
-
 
 // Get/SetObject must be special-cased, since it works with handles.
 
@@ -296,28 +285,21 @@
 
 UNSAFE_ENTRY(jobject, Unsafe_GetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset))
   UnsafeWrapper("Unsafe_GetObjectVolatile");
-  GET_OOP_FIELD_VOLATILE(obj, offset, v)
+  oop p = JNIHandles::resolve(obj);
+  void* addr = index_oop_from_field_offset_long(p, offset);
+  volatile oop v;
+  if (UseCompressedOops) {
+    volatile narrowOop n = *(volatile narrowOop*) addr;
+    v = oopDesc::decode_heap_oop(n);
+  } else {
+    v = *(volatile oop*) addr;
+  }
+  OrderAccess::acquire();
   return JNIHandles::make_local(env, v);
 UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_SetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
   UnsafeWrapper("Unsafe_SetObjectVolatile");
-  {
-    // Catch VolatileCallSite.target stores (via
-    // CallSite.setTargetVolatile) and check call site dependencies.
-    oop p = JNIHandles::resolve(obj);
-    if ((offset == java_lang_invoke_CallSite::target_offset_in_bytes()) && p->is_a(SystemDictionary::CallSite_klass())) {
-      Handle call_site    (THREAD, p);
-      Handle method_handle(THREAD, JNIHandles::resolve(x_h));
-      assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
-      assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
-      {
-        // Walk all nmethods depending on this call site.
-        MutexLocker mu(Compile_lock, thread);
-        Universe::flush_dependents_on(call_site(), method_handle());
-      }
-    }
-  }
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
   void* addr = index_oop_from_field_offset_long(p, offset);
--- a/src/share/vm/services/memRecorder.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/services/memRecorder.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -45,11 +45,11 @@
 }
 
 
-debug_only(volatile jint MemRecorder::_instance_count = 0;)
+volatile jint MemRecorder::_instance_count = 0;
 
 MemRecorder::MemRecorder() {
   assert(MemTracker::is_on(), "Native memory tracking is off");
-  debug_only(Atomic::inc(&_instance_count);)
+  Atomic::inc(&_instance_count);
   debug_only(set_generation();)
 
   if (MemTracker::track_callsite()) {
@@ -83,9 +83,7 @@
     delete _next;
   }
 
-#ifdef ASSERT
   Atomic::dec(&_instance_count);
-#endif
 }
 
 // Sorting order:
--- a/src/share/vm/services/memRecorder.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/services/memRecorder.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -249,9 +249,9 @@
 
   SequencedRecordIterator pointer_itr();
 
- public:
+ protected:
   // number of MemRecorder instance
-  debug_only(static volatile jint _instance_count;)
+  static volatile jint _instance_count;
 
  private:
   // sorting function, sort records into following order
--- a/src/share/vm/services/memSnapshot.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/services/memSnapshot.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -173,7 +173,7 @@
     _staging_area = new (std::nothrow)MemPointerArrayImpl<SeqMemPointerRecord>();
   }
 
-  _lock = new (std::nothrow) Mutex(Monitor::native, "memSnapshotLock");
+  _lock = new (std::nothrow) Mutex(Monitor::max_nonleaf - 1, "memSnapshotLock");
   NOT_PRODUCT(_untracked_count = 0;)
 }
 
--- a/src/share/vm/services/memTrackWorker.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/services/memTrackWorker.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -67,7 +67,7 @@
   NOT_PRODUCT(int _last_gen_in_use;)
 
   inline int generations_in_use() const {
-    return (_tail <= _head ? (_head - _tail + 1) : (MAX_GENERATIONS - (_tail - _head) + 1));
+    return (_tail >= _head ? (_tail - _head + 1) : (MAX_GENERATIONS - (_head - _tail) + 1));
   }
 };
 
--- a/src/share/vm/services/memTracker.cpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/services/memTracker.cpp	Tue Jul 24 13:16:26 2012 -0400
@@ -54,7 +54,7 @@
 MemRecorder*                    MemTracker::_global_recorder = NULL;
 MemSnapshot*                    MemTracker::_snapshot = NULL;
 MemBaseline                     MemTracker::_baseline;
-Mutex                           MemTracker::_query_lock(Monitor::native, "NMT_queryLock");
+Mutex*                          MemTracker::_query_lock = NULL;
 volatile MemRecorder*           MemTracker::_merge_pending_queue = NULL;
 volatile MemRecorder*           MemTracker::_pooled_recorders = NULL;
 MemTrackWorker*                 MemTracker::_worker_thread = NULL;
@@ -89,6 +89,12 @@
       return;
     }
 
+    _query_lock = new (std::nothrow) Mutex(Monitor::max_nonleaf, "NMT_queryLock");
+    if (_query_lock == NULL) {
+      shutdown(NMT_out_of_memory);
+      return;
+    }
+
     debug_only(_main_thread_tid = os::current_thread_id();)
     _state = NMT_bootstrapping_single_thread;
     NMT_track_callsite = (_tracking_level == NMT_detail && can_walk_stack());
@@ -164,7 +170,7 @@
   {
     // shared baseline and snapshot are the only objects needed to
     // create query results
-    MutexLockerEx locker(&_query_lock, true);
+    MutexLockerEx locker(_query_lock, true);
     // cleanup baseline data and snapshot
     _baseline.clear();
     delete _snapshot;
@@ -351,21 +357,17 @@
     }
 
     if (thread != NULL) {
-#ifdef ASSERT
-      // cause assertion on stack base. This ensures that threads call
-      // Thread::record_stack_base_and_size() method, which will create
-      // thread native stack records.
-      thread->stack_base();
-#endif
-      // for a JavaThread, if it is running in native state, we need to transition it to
-      // VM state, so it can stop at safepoint. JavaThread running in VM state does not
-      // need lock to write records.
       if (thread->is_Java_thread() && ((JavaThread*)thread)->is_safepoint_visible()) {
-        if (((JavaThread*)thread)->thread_state() == _thread_in_native) {
-          ThreadInVMfromNative trans((JavaThread*)thread);
-          create_record_in_recorder(addr, flags, size, pc, thread);
+        JavaThread*      java_thread = static_cast<JavaThread*>(thread);
+        JavaThreadState  state = java_thread->thread_state();
+        if (SafepointSynchronize::safepoint_safe(java_thread, state)) {
+          // JavaThreads that are safepoint safe, can run through safepoint,
+          // so ThreadCritical is needed to ensure no threads at safepoint create
+          // new records while the records are being gathered and the sequence number is changing
+          ThreadCritical tc;
+          create_record_in_recorder(addr, flags, size, pc, java_thread);
         } else {
-          create_record_in_recorder(addr, flags, size, pc, thread);
+          create_record_in_recorder(addr, flags, size, pc, java_thread);
         }
       } else {
         // other threads, such as worker and watcher threads, etc. need to
@@ -390,10 +392,9 @@
 // write a record to proper recorder. No lock can be taken from this method
 // down.
 void MemTracker::create_record_in_recorder(address addr, MEMFLAGS flags,
-    size_t size, address pc, Thread* thread) {
-    assert(thread == NULL || thread->is_Java_thread(), "wrong thread");
+    size_t size, address pc, JavaThread* thread) {
 
-    MemRecorder* rc = get_thread_recorder((JavaThread*)thread);
+    MemRecorder* rc = get_thread_recorder(thread);
     if (rc != NULL) {
       rc->record(addr, flags, size, pc);
     }
@@ -460,17 +461,18 @@
       }
     }
     _sync_point_skip_count = 0;
-    // walk all JavaThreads to collect recorders
-    SyncThreadRecorderClosure stc;
-    Threads::threads_do(&stc);
-
-    _thread_count = stc.get_thread_count();
-    MemRecorder* pending_recorders = get_pending_recorders();
-
     {
       // This method is running at safepoint, with ThreadCritical lock,
       // it should guarantee that NMT is fully sync-ed.
       ThreadCritical tc;
+
+      // walk all JavaThreads to collect recorders
+      SyncThreadRecorderClosure stc;
+      Threads::threads_do(&stc);
+
+      _thread_count = stc.get_thread_count();
+      MemRecorder* pending_recorders = get_pending_recorders();
+
       if (_global_recorder != NULL) {
         _global_recorder->set_next(pending_recorders);
         pending_recorders = _global_recorder;
@@ -486,8 +488,6 @@
 
   // now, it is the time to shut whole things off
   if (_state == NMT_final_shutdown) {
-    _tracking_level = NMT_off;
-
     // walk all JavaThreads to delete all recorders
     SyncThreadRecorderClosure stc;
     Threads::threads_do(&stc);
@@ -499,8 +499,16 @@
         _global_recorder = NULL;
       }
     }
-
-    _state = NMT_shutdown;
+    MemRecorder* pending_recorders = get_pending_recorders();
+    if (pending_recorders != NULL) {
+      delete pending_recorders;
+    }
+    // try at a later sync point to ensure MemRecorder instance drops to zero to
+    // completely shutdown NMT
+    if (MemRecorder::_instance_count == 0) {
+      _state = NMT_shutdown;
+      _tracking_level = NMT_off;
+    }
   }
 }
 
@@ -534,7 +542,7 @@
 
 // baseline current memory snapshot
 bool MemTracker::baseline() {
-  MutexLockerEx lock(&_query_lock, true);
+  MutexLockerEx lock(_query_lock, true);
   MemSnapshot* snapshot = get_snapshot();
   if (snapshot != NULL) {
     return _baseline.baseline(*snapshot, false);
@@ -545,7 +553,7 @@
 // print memory usage from current snapshot
 bool MemTracker::print_memory_usage(BaselineOutputer& out, size_t unit, bool summary_only) {
   MemBaseline  baseline;
-  MutexLockerEx lock(&_query_lock, true);
+  MutexLockerEx lock(_query_lock, true);
   MemSnapshot* snapshot = get_snapshot();
   if (snapshot != NULL && baseline.baseline(*snapshot, summary_only)) {
     BaselineReporter reporter(out, unit);
@@ -557,7 +565,7 @@
 
 // compare memory usage between current snapshot and baseline
 bool MemTracker::compare_memory_usage(BaselineOutputer& out, size_t unit, bool summary_only) {
-  MutexLockerEx lock(&_query_lock, true);
+  MutexLockerEx lock(_query_lock, true);
   if (_baseline.baselined()) {
     MemBaseline baseline;
     MemSnapshot* snapshot = get_snapshot();
--- a/src/share/vm/services/memTracker.hpp	Mon Jul 16 15:31:18 2012 -0400
+++ b/src/share/vm/services/memTracker.hpp	Tue Jul 24 13:16:26 2012 -0400
@@ -126,6 +126,8 @@
         return "Native memory tracking has been shutdown by user";
       case NMT_normal:
         return "Native memory tracking has been shutdown due to process exiting";
+      case NMT_out_of_memory:
+        return "Native memory tracking has been shutdown due to out of native memory";
       case NMT_initialization:
         return "Native memory tracking failed to initialize";
       case NMT_error_reporting:
@@ -326,7 +328,7 @@
   static void create_memory_record(address addr, MEMFLAGS type,
                    size_t size, address pc, Thread* thread);
   static void create_record_in_recorder(address addr, MEMFLAGS type,
-                   size_t size, address pc, Thread* thread);
+                   size_t size, address pc, JavaThread* thread);
 
  private:
   // global memory snapshot
@@ -336,7 +338,7 @@
   static MemBaseline      _baseline;
 
   // query lock
-  static Mutex            _query_lock;
+  static Mutex*           _query_lock;
 
   // a thread can start to allocate memory before it is attached
   // to VM 'Thread', those memory activities are recorded here.
--- a/test/runtime/6294277/SourceDebugExtension.java	Mon Jul 16 15:31:18 2012 -0400
+++ b/test/runtime/6294277/SourceDebugExtension.java	Tue Jul 24 13:16:26 2012 -0400
@@ -25,6 +25,7 @@
  * @test
  * @bug 6294277
  * @summary java -Xdebug crashes on SourceDebugExtension attribute larger than 64K
+ * @run main/othervm -Xdebug -Xrunjdwp:transport=dt_socket,address=8888,server=y,suspend=n SourceDebugExtension
  */
 import java.io.*;
 
--- a/test/runtime/6294277/Test6294277.sh	Mon Jul 16 15:31:18 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-# 
-#  Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
-#  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-# 
-#  This code is free software; you can redistribute it and/or modify it
-#  under the terms of the GNU General Public License version 2 only, as
-#  published by the Free Software Foundation.
-# 
-#  This code is distributed in the hope that it will be useful, but WITHOUT
-#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-#  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  version 2 for more details (a copy is included in the LICENSE file that
-#  accompanied this code).
-# 
-#  You should have received a copy of the GNU General Public License version
-#  2 along with this work; if not, write to the Free Software Foundation,
-#  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-# 
-#  Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-#  or visit www.oracle.com if you need additional information or have any
-#  questions.
-# 
-
- 
-# @test Test6294277.sh
-# @bug 6294277
-# @summary java -Xdebug crashes on SourceDebugExtension attribute larger than 64K
-# @run shell Test6294277.sh
-#
-
-
-if [ "${TESTSRC}" = "" ]
-then TESTSRC=.
-fi
-
-if [ "${TESTJAVA}" = "" ]
-then
-  PARENT=`dirname \`which java\``
-  TESTJAVA=`dirname ${PARENT}`
-  echo "TESTJAVA not set, selecting " ${TESTJAVA}
-  echo "If this is incorrect, try setting the variable manually."
-fi
-
-BIT_FLAG=""
-
-# set platform-dependent variables
-OS=`uname -s`
-case "$OS" in
-  SunOS | Linux )
-    NULL=/dev/null
-    PS=":"
-    FS="/"
-    ## for solaris, linux it's HOME
-    FILE_LOCATION=$HOME
-    if [ -f ${FILE_LOCATION}${FS}JDK64BIT -a ${OS} = "SunOS" -a `uname -p`='sparc' ]
-    then
-        BIT_FLAG="-d64"
-    fi
-    ;;
-  Windows_* | Darwin )
-    NULL=NUL
-    PS=";"
-    FS="\\"
-    echo "Test skipped"
-    exit 0
-    ;;
-  * )
-    echo "Unrecognized system!"
-    exit 1;
-    ;;
-esac
-
-cp ${TESTSRC}${FS}*.java .
-
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -fullversion
-
-${TESTJAVA}${FS}bin${FS}javac *.java
-
-${TESTJAVA}${FS}bin${FS}java ${BIT_FLAG} -classpath . -Xdebug -Xrunjdwp:transport=dt_socket,address=8888,server=y,suspend=n SourceDebugExtension > test.out 2>&1 &
-
-P_PID=$!
-
-sleep 60
-STATUS=1
-
-grep "Test PASSES" test.out > ${NULL}
-if [ $? = 0 ]; then
-    cat test.out
-    STATUS=0
-fi
-
-exit $STATUS
--- a/test/runtime/7020373/Test7020373.sh	Mon Jul 16 15:31:18 2012 -0400
+++ b/test/runtime/7020373/Test7020373.sh	Tue Jul 24 13:16:26 2012 -0400
@@ -2,10 +2,9 @@
 
 ##
 ## @test
-## @bug 7020373 7055247
+## @bug 7020373 7055247 7053586
 ## @key cte_test
 ## @summary JSR rewriting can overflow memory address size variables
-## @ignore Ignore it until 7053586 fixed
 ## @run shell Test7020373.sh
 ##
 
@@ -30,7 +29,7 @@
 # set platform-dependent variables
 OS=`uname -s`
 case "$OS" in
-  SunOS | Linux )
+  SunOS | Linux | Darwin )
     NULL=/dev/null
     PS=":"
     FS="/"
@@ -66,7 +65,7 @@
     echo "Test Failed"
     exit 1
 else
-    grep "java.lang.LinkageError" test.out
+    egrep "java.lang.LinkageError|java.lang.NoSuchMethodError|Main method not found in class OOMCrashClass4000_1|insufficient memory" test.out
     if [ $? = 0 ]
     then
         echo "Test Passed"