changeset 3579:bc3e01899804

Merge
author kvn
date Thu, 19 Jul 2012 16:17:41 -0700
parents 7553d441b878 (current diff) dd785aabe02b (diff)
children 12fc2571a6e2
files
diffstat 16 files changed, 737 insertions(+), 418 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/x86/vm/assembler_x86.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -2573,6 +2573,13 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x6C);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::push(int32_t imm32) {
   // in 64bits we push 64bits onto the stack but only
   // take a 32bit immediate
@@ -3178,6 +3185,13 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx2() || (!vector256) && VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
+  emit_byte(0xEF);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
   bool vector256 = true;
@@ -3189,6 +3203,17 @@
   emit_byte(0x01);
 }
 
+void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx2(), "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  emit_byte(0x38);
+  emit_byte(0xC0 | encode);
+  // 0x00 - insert into lower 128 bits
+  // 0x01 - insert into upper 128 bits
+  emit_byte(0x01);
+}
+
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
@@ -7480,6 +7505,24 @@
   movb(as_Address(dst), src);
 }
 
+void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movdl(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movdl(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    movq(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    movq(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
     if (UseXmmLoadAndClearUpper) {
--- a/src/cpu/x86/vm/assembler_x86.hpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Jul 19 16:17:41 2012 -0700
@@ -1466,6 +1466,9 @@
   void punpckldq(XMMRegister dst, XMMRegister src);
   void punpckldq(XMMRegister dst, Address src);
 
+  // Interleave Low Quadwords
+  void punpcklqdq(XMMRegister dst, XMMRegister src);
+
 #ifndef _LP64 // no 32bit push/pop on amd64
   void pushl(Address src);
 #endif
@@ -1606,13 +1609,11 @@
 
   void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
 
-  // AVX 3-operands instructions (encoded with VEX prefix)
+  // AVX 3-operands scalar instructions (encoded with VEX prefix)
   void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
   void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vaddss(XMMRegister dst, XMMRegister nds, Address src);
   void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vandps(XMMRegister dst, XMMRegister nds, Address src);
   void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
   void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vdivss(XMMRegister dst, XMMRegister nds, Address src);
@@ -1625,13 +1626,17 @@
   void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
 
   // AVX Vector instrucitons.
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
   // AVX instruction which is used to clear upper 128 bits of YMM registers and
   // to avoid transaction penalty between AVX and SSE states. There is no
@@ -2563,6 +2568,20 @@
   void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
   void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
+  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
+
+  // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
+  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+    if (UseAVX > 1) // vinserti128h is available only in AVX2
+      Assembler::vinserti128h(dst, nds, src);
+    else
+      Assembler::vinsertf128h(dst, nds, src);
+  }
 
   // Data
 
@@ -2615,6 +2634,13 @@
   // to avoid hiding movb
   void movbyte(ArrayAddress dst, int src);
 
+  // Import other mov() methods from the parent class or else
+  // they will be hidden by the following overriding declaration.
+  using Assembler::movdl;
+  using Assembler::movq;
+  void movdl(XMMRegister dst, AddressLiteral src);
+  void movq(XMMRegister dst, AddressLiteral src);
+
   // Can push value or effective address
   void pushptr(AddressLiteral src);
 
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -562,7 +562,7 @@
         AllocatePrefetchInstr = 3;
       }
       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
-      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
+      if( supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
         UseXMMForArrayCopy = true;
       }
       if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
--- a/src/cpu/x86/vm/x86.ad	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/cpu/x86/vm/x86.ad	Thu Jul 19 16:17:41 2012 -0700
@@ -71,244 +71,244 @@
 //              XMM0-XMM3 might hold parameters
 
 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
-reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
-reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next());
-reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next());
-reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next());
-reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
+reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
+reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
+reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
+reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
+reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
+reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
 
 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
-reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
-reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next());
-reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next());
-reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next());
-reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
+reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
+reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
+reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
+reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
+reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
+reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
 
 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
-reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
-reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next());
-reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next());
-reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next());
-reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
+reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
+reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
+reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
+reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
+reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
+reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
 
 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
-reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
-reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next());
-reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next());
-reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next());
-reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
+reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
+reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
+reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
+reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
+reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
+reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
 
 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
-reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
-reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next());
-reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next());
-reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next());
-reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
+reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
+reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
+reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
+reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
+reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
+reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
 
 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
-reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
-reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next());
-reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next());
-reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next());
-reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
+reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
+reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
+reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
+reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
+reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
+reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
 
 #ifdef _WIN64
 
 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next());
-reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
-reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
-reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
-reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
+reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
+reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
+reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
+reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
+reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
+reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 
 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next());
-reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
-reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
-reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
-reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
+reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
+reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
+reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
+reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
+reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
+reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 
 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
-reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next());
-reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
-reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
-reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
-reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
+reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
+reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
+reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
+reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
+reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
+reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 
 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
-reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next());
-reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
-reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
-reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
-reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
+reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
+reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
+reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
+reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
+reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
+reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 
 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
-reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
-reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
-reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
-reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
+reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
+reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
+reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
+reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
+reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
+reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 
 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
-reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
-reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
-reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
-reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
+reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
+reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
+reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
+reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
+reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
+reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 
 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
-reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
-reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
-reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
-reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
+reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
+reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
+reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
+reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
+reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
+reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 
 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
-reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
-reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
-reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
-reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
+reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
+reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
+reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
+reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
+reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
+reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 
 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
-reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
-reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
-reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
-reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
+reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
+reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
+reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
+reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
+reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
+reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 
 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
-reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
-reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
-reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
-reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
+reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
+reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
+reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
+reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
+reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
+reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 
 #else // _WIN64
 
 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
-reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
-reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
-reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
-reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
+reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
+reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
+reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
+reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
+reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
+reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
 
 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
-reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
-reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
-reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
-reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
+reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
+reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
+reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
+reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
+reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
+reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
 
 #ifdef _LP64
 
 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
-reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next());
-reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
-reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
-reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
-reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
+reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
+reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
+reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
+reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
+reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
+reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
 
 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
-reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next());
-reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
-reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
-reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
-reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
+reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
+reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
+reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
+reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
+reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
+reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
 
 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
-reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
-reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
-reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
-reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
+reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
+reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
+reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
+reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
+reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
+reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
 
 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
-reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
-reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
-reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
-reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
+reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
+reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
+reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
+reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
+reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
+reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
 
 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
-reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
-reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
-reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
-reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
+reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
+reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
+reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
+reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
+reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
+reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
 
 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
-reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
-reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
-reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
-reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
+reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
+reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
+reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
+reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
+reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
+reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
 
 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
-reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
-reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
-reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
-reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
+reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
+reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
+reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
+reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
+reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
+reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
 
 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
-reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
-reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
-reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
-reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
-reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
-reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
+reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
+reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
+reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
+reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
+reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
+reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
 
 #endif // _LP64
 
@@ -889,7 +889,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_reg(regF dst, regF src1, regF src2) %{
+instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src1 src2));
 
@@ -901,7 +901,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_mem(regF dst, regF src1, memory src2) %{
+instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src1 (LoadF src2)));
 
@@ -913,7 +913,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddF_imm(regF dst, regF src, immF con) %{
+instruct addF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (AddF src con));
 
@@ -960,7 +960,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_reg(regD dst, regD src1, regD src2) %{
+instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src1 src2));
 
@@ -972,7 +972,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_mem(regD dst, regD src1, memory src2) %{
+instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src1 (LoadD src2)));
 
@@ -984,7 +984,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vaddD_imm(regD dst, regD src, immD con) %{
+instruct addD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (AddD src con));
 
@@ -1031,7 +1031,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_reg(regF dst, regF src1, regF src2) %{
+instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src1 src2));
 
@@ -1043,7 +1043,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_mem(regF dst, regF src1, memory src2) %{
+instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src1 (LoadF src2)));
 
@@ -1055,7 +1055,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubF_imm(regF dst, regF src, immF con) %{
+instruct subF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (SubF src con));
 
@@ -1102,7 +1102,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_reg(regD dst, regD src1, regD src2) %{
+instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src1 src2));
 
@@ -1114,7 +1114,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_mem(regD dst, regD src1, memory src2) %{
+instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src1 (LoadD src2)));
 
@@ -1126,7 +1126,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vsubD_imm(regD dst, regD src, immD con) %{
+instruct subD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (SubD src con));
 
@@ -1173,7 +1173,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_reg(regF dst, regF src1, regF src2) %{
+instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src1 src2));
 
@@ -1185,7 +1185,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_mem(regF dst, regF src1, memory src2) %{
+instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src1 (LoadF src2)));
 
@@ -1197,7 +1197,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulF_imm(regF dst, regF src, immF con) %{
+instruct mulF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (MulF src con));
 
@@ -1244,7 +1244,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_reg(regD dst, regD src1, regD src2) %{
+instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src1 src2));
 
@@ -1256,7 +1256,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_mem(regD dst, regD src1, memory src2) %{
+instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src1 (LoadD src2)));
 
@@ -1268,7 +1268,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vmulD_imm(regD dst, regD src, immD con) %{
+instruct mulD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (MulD src con));
 
@@ -1315,7 +1315,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_reg(regF dst, regF src1, regF src2) %{
+instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src1 src2));
 
@@ -1327,7 +1327,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_mem(regF dst, regF src1, memory src2) %{
+instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src1 (LoadF src2)));
 
@@ -1339,7 +1339,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivF_imm(regF dst, regF src, immF con) %{
+instruct divF_reg_imm(regF dst, regF src, immF con) %{
   predicate(UseAVX > 0);
   match(Set dst (DivF src con));
 
@@ -1386,7 +1386,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_reg(regD dst, regD src1, regD src2) %{
+instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src1 src2));
 
@@ -1398,7 +1398,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_mem(regD dst, regD src1, memory src2) %{
+instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src1 (LoadD src2)));
 
@@ -1410,7 +1410,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vdivD_imm(regD dst, regD src, immD con) %{
+instruct divD_reg_imm(regD dst, regD src, immD con) %{
   predicate(UseAVX > 0);
   match(Set dst (DivD src con));
 
@@ -1433,7 +1433,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsF_reg(regF dst, regF src) %{
+instruct absF_reg_reg(regF dst, regF src) %{
   predicate(UseAVX > 0);
   match(Set dst (AbsF src));
   ins_cost(150);
@@ -1457,7 +1457,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsD_reg(regD dst, regD src) %{
+instruct absD_reg_reg(regD dst, regD src) %{
   predicate(UseAVX > 0);
   match(Set dst (AbsD src));
   ins_cost(150);
@@ -1481,7 +1481,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegF_reg(regF dst, regF src) %{
+instruct negF_reg_reg(regF dst, regF src) %{
   predicate(UseAVX > 0);
   match(Set dst (NegF src));
   ins_cost(150);
@@ -1505,7 +1505,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegD_reg(regD dst, regD src) %{
+instruct negD_reg_reg(regD dst, regD src) %{
   predicate(UseAVX > 0);
   match(Set dst (NegD src));
   ins_cost(150);
@@ -1719,12 +1719,12 @@
   format %{ "movd    $dst,$src\n\t"
             "punpcklbw $dst,$dst\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
-            "movlhps $dst,$dst\t! replicate16B" %}
+            "punpcklqdq $dst,$dst\t! replicate16B" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1735,14 +1735,14 @@
   format %{ "movd    $dst,$src\n\t"
             "punpcklbw $dst,$dst\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate32B" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1751,9 +1751,9 @@
 instruct Repl4B_imm(vecS dst, immI con) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateB con));
-  format %{ "movss   $dst,[$constantaddress]\t! replicate4B($con)" %}
+  format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
   ins_encode %{
-    __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
+    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1761,9 +1761,9 @@
 instruct Repl8B_imm(vecD dst, immI con) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (ReplicateB con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate8B($con)" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1771,11 +1771,11 @@
 instruct Repl16B_imm(vecX dst, immI con) %{
   predicate(n->as_Vector()->length() == 16);
   match(Set dst (ReplicateB con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate16B($con)\n\t"
-            "movlhps $dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1783,13 +1783,13 @@
 instruct Repl32B_imm(vecY dst, immI con) %{
   predicate(n->as_Vector()->length() == 32);
   match(Set dst (ReplicateB con));
-  format %{ "movsd   $dst,[$constantaddress]\t! lreplicate32B($con)\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1828,11 +1828,11 @@
 instruct Repl32B_zero(vecY dst, immI0 zero) %{
   predicate(n->as_Vector()->length() == 32);
   match(Set dst (ReplicateB zero));
-  format %{ "vxorpd  $dst,$dst,$dst\t! replicate32B zero" %}
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
   ins_encode %{
     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
     bool vector256 = true;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -1867,11 +1867,11 @@
   match(Set dst (ReplicateS src));
   format %{ "movd    $dst,$src\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
-            "movlhps $dst,$dst\t! replicate8S" %}
+            "punpcklqdq $dst,$dst\t! replicate8S" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1881,13 +1881,13 @@
   match(Set dst (ReplicateS src));
   format %{ "movd    $dst,$src\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate16S" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1896,9 +1896,9 @@
 instruct Repl2S_imm(vecS dst, immI con) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateS con));
-  format %{ "movss   $dst,[$constantaddress]\t! replicate2S($con)" %}
+  format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
   ins_encode %{
-    __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
+    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -1906,9 +1906,9 @@
 instruct Repl4S_imm(vecD dst, immI con) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateS con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate4S($con)" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -1916,11 +1916,11 @@
 instruct Repl8S_imm(vecX dst, immI con) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (ReplicateS con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate8S($con)\n\t"
-            "movlhps $dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1928,13 +1928,13 @@
 instruct Repl16S_imm(vecY dst, immI con) %{
   predicate(n->as_Vector()->length() == 16);
   match(Set dst (ReplicateS con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate16S($con)\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -1973,11 +1973,11 @@
 instruct Repl16S_zero(vecY dst, immI0 zero) %{
   predicate(n->as_Vector()->length() == 16);
   match(Set dst (ReplicateS zero));
-  format %{ "vxorpd  $dst,$dst,$dst\t! replicate16S zero" %}
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
   ins_encode %{
     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
     bool vector256 = true;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -2012,11 +2012,11 @@
   match(Set dst (ReplicateI src));
   format %{ "movd    $dst,$src\n\t"
             "pshufd  $dst,$dst,0x00\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
+            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2025,9 +2025,9 @@
 instruct Repl2I_imm(vecD dst, immI con) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateI con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate2I($con)" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -2035,11 +2035,11 @@
 instruct Repl4I_imm(vecX dst, immI con) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateI con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate4I($con)\n\t"
-            "movlhps $dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
+            "punpcklqdq $dst,$dst" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2047,13 +2047,13 @@
 instruct Repl8I_imm(vecY dst, immI con) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (ReplicateI con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate8I($con)\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2061,7 +2061,7 @@
 // Integer could be loaded into xmm register directly from memory.
 instruct Repl2I_mem(vecD dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateI (LoadVector mem)));
+  match(Set dst (ReplicateI (LoadI mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
   ins_encode %{
@@ -2073,7 +2073,7 @@
 
 instruct Repl4I_mem(vecX dst, memory mem) %{
   predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateI (LoadVector mem)));
+  match(Set dst (ReplicateI (LoadI mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
   ins_encode %{
@@ -2085,14 +2085,14 @@
 
 instruct Repl8I_mem(vecY dst, memory mem) %{
   predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateI (LoadVector mem)));
+  match(Set dst (ReplicateI (LoadI mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
+            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $mem$$Address);
     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2121,11 +2121,11 @@
 instruct Repl8I_zero(vecY dst, immI0 zero) %{
   predicate(n->as_Vector()->length() == 8);
   match(Set dst (ReplicateI zero));
-  format %{ "vxorpd  $dst,$dst,$dst\t! replicate8I zero" %}
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
   ins_encode %{
     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
     bool vector256 = true;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
   %}
   ins_pipe( fpu_reg_reg );
 %}
@@ -2136,10 +2136,10 @@
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateL src));
   format %{ "movdq   $dst,$src\n\t"
-            "movlhps $dst,$dst\t! replicate2L" %}
+            "punpcklqdq $dst,$dst\t! replicate2L" %}
   ins_encode %{
     __ movdq($dst$$XMMRegister, $src$$Register);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2148,12 +2148,12 @@
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateL src));
   format %{ "movdq   $dst,$src\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movdq($dst$$XMMRegister, $src$$Register);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2165,12 +2165,12 @@
   format %{ "movdl   $dst,$src.lo\n\t"
             "movdl   $tmp,$src.hi\n\t"
             "punpckldq $dst,$tmp\n\t"
-            "movlhps $dst,$dst\t! replicate2L"%}
+            "punpcklqdq $dst,$dst\t! replicate2L"%}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2182,14 +2182,14 @@
   format %{ "movdl   $dst,$src.lo\n\t"
             "movdl   $tmp,$src.hi\n\t"
             "punpckldq $dst,$tmp\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2199,11 +2199,11 @@
 instruct Repl2L_imm(vecX dst, immL con) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateL con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate2L($con)\n\t"
-            "movlhps $dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress($con));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress($con));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2211,13 +2211,13 @@
 instruct Repl4L_imm(vecY dst, immL con) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateL con));
-  format %{ "movsd   $dst,[$constantaddress]\t! replicate4L($con)\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst" %}
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
   ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress($con));
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ movq($dst$$XMMRegister, $constantaddress($con));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2225,26 +2225,26 @@
 // Long could be loaded into xmm register directly from memory.
 instruct Repl2L_mem(vecX dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateL (LoadVector mem)));
+  match(Set dst (ReplicateL (LoadL mem)));
   format %{ "movq    $dst,$mem\n\t"
-            "movlhps $dst,$dst\t! replicate2L" %}
+            "punpcklqdq $dst,$dst\t! replicate2L" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $mem$$Address);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl4L_mem(vecY dst, memory mem) %{
   predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateL (LoadVector mem)));
+  match(Set dst (ReplicateL (LoadL mem)));
   format %{ "movq    $dst,$mem\n\t"
-            "movlhps $dst,$dst\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $mem$$Address);
-    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -2263,11 +2263,11 @@
 instruct Repl4L_zero(vecY dst, immL0 zero) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateL zero));
-  format %{ "vxorpd  $dst,$dst,$dst\t! replicate4L zero" %}
+  format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
   ins_encode %{
     // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
     bool vector256 = true;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
   %}
   ins_pipe( fpu_reg_reg );
 %}
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -3505,8 +3505,10 @@
   }
 
   // now perform tests that are based on flag settings
-  if (callee->should_inline()) {
+  if (callee->force_inline() || callee->should_inline()) {
     // ignore heuristic controls on inlining
+    if (callee->force_inline())
+      CompileTask::print_inlining(callee, scope()->level(), bci(), "force inline by annotation");
   } else {
     if (inline_level() > MaxInlineLevel                         ) INLINE_BAILOUT("too-deep inlining");
     if (recursive_inline_level(callee) > MaxRecursiveInlineLevel) INLINE_BAILOUT("too-deep recursive inlining");
@@ -3531,7 +3533,7 @@
   }
 
 #ifndef PRODUCT
-      // printing
+  // printing
   if (PrintInlining) {
     print_inline_result(callee, true);
   }
--- a/src/share/vm/ci/ciMethod.hpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/ci/ciMethod.hpp	Thu Jul 19 16:17:41 2012 -0700
@@ -160,6 +160,8 @@
   // Code size for inlining decisions.
   int code_size_for_inlining();
 
+  bool force_inline() { return get_methodOop()->force_inline(); }
+
   int comp_level();
   int highest_osr_comp_level();
 
--- a/src/share/vm/classfile/classFileParser.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/classfile/classFileParser.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -318,6 +318,13 @@
 
 bool inline valid_cp_range(int index, int length) { return (index > 0 && index < length); }
 
+inline Symbol* check_symbol_at(constantPoolHandle cp, int index) {
+  if (valid_cp_range(index, cp->length()) && cp->tag_at(index).is_utf8())
+    return cp->symbol_at(index);
+  else
+    return NULL;
+}
+
 constantPoolHandle ClassFileParser::parse_constant_pool(Handle class_loader, TRAPS) {
   ClassFileStream* cfs = stream();
   constantPoolHandle nullHandle;
@@ -902,6 +909,7 @@
                                              bool* is_synthetic_addr,
                                              u2* generic_signature_index_addr,
                                              typeArrayHandle* field_annotations,
+                                             ClassFileParser::FieldAnnotationCollector* parsed_annotations,
                                              TRAPS) {
   ClassFileStream* cfs = stream();
   assert(attributes_count > 0, "length should be greater than 0");
@@ -1142,12 +1150,14 @@
     bool is_synthetic = false;
     u2 generic_signature_index = 0;
     bool is_static = access_flags.is_static();
+    FieldAnnotationCollector parsed_annotations;
 
     u2 attributes_count = cfs->get_u2_fast();
     if (attributes_count > 0) {
       parse_field_attributes(cp, attributes_count, is_static, signature_index,
                              &constantvalue_index, &is_synthetic,
                              &generic_signature_index, &field_annotations,
+                             &parsed_annotations,
                              CHECK_(nullHandle));
       if (field_annotations.not_null()) {
         if (fields_annotations->is_null()) {
@@ -1173,6 +1183,8 @@
                       signature_index,
                       constantvalue_index,
                       0);
+    if (parsed_annotations.has_any_annotations())
+      parsed_annotations.apply_to(field);
 
     BasicType type = cp->basic_type_for_signature_at(signature_index);
 
@@ -1634,12 +1646,158 @@
       name->as_C_string(), _class_name->as_C_string(), sig->as_C_string());
 }
 
+// Skip an annotation.  Return >=limit if there is any problem.
+int ClassFileParser::skip_annotation(u1* buffer, int limit, int index) {
+  // annotation := atype:u2 do(nmem:u2) {member:u2 value}
+  // value := switch (tag:u1) { ... }
+  index += 2;  // skip atype
+  if ((index += 2) >= limit)  return limit;  // read nmem
+  int nmem = Bytes::get_Java_u2(buffer+index-2);
+  while (--nmem >= 0 && index < limit) {
+    index += 2; // skip member
+    index = skip_annotation_value(buffer, limit, index);
+  }
+  return index;
+}
+
+// Skip an annotation value.  Return >=limit if there is any problem.
+int ClassFileParser::skip_annotation_value(u1* buffer, int limit, int index) {
+  // value := switch (tag:u1) {
+  //   case B, C, I, S, Z, D, F, J, c: con:u2;
+  //   case e: e_class:u2 e_name:u2;
+  //   case s: s_con:u2;
+  //   case [: do(nval:u2) {value};
+  //   case @: annotation;
+  //   case s: s_con:u2;
+  // }
+  if ((index += 1) >= limit)  return limit;  // read tag
+  u1 tag = buffer[index-1];
+  switch (tag) {
+  case 'B': case 'C': case 'I': case 'S': case 'Z':
+  case 'D': case 'F': case 'J': case 'c': case 's':
+    index += 2;  // skip con or s_con
+    break;
+  case 'e':
+    index += 4;  // skip e_class, e_name
+    break;
+  case '[':
+    {
+      if ((index += 2) >= limit)  return limit;  // read nval
+      int nval = Bytes::get_Java_u2(buffer+index-2);
+      while (--nval >= 0 && index < limit) {
+        index = skip_annotation_value(buffer, limit, index);
+      }
+    }
+    break;
+  case '@':
+    index = skip_annotation(buffer, limit, index);
+    break;
+  default:
+    assert(false, "annotation tag");
+    return limit;  //  bad tag byte
+  }
+  return index;
+}
+
+// Sift through annotations, looking for those significant to the VM:
+void ClassFileParser::parse_annotations(u1* buffer, int limit,
+                                        constantPoolHandle cp,
+                                        ClassFileParser::AnnotationCollector* coll,
+                                        TRAPS) {
+  // annotations := do(nann:u2) {annotation}
+  int index = 0;
+  if ((index += 2) >= limit)  return;  // read nann
+  int nann = Bytes::get_Java_u2(buffer+index-2);
+  enum {  // initial annotation layout
+    atype_off = 0,      // utf8 such as 'Ljava/lang/annotation/Retention;'
+    count_off = 2,      // u2   such as 1 (one value)
+    member_off = 4,     // utf8 such as 'value'
+    tag_off = 6,        // u1   such as 'c' (type) or 'e' (enum)
+    e_tag_val = 'e',
+      e_type_off = 7,   // utf8 such as 'Ljava/lang/annotation/RetentionPolicy;'
+      e_con_off = 9,    // utf8 payload, such as 'SOURCE', 'CLASS', 'RUNTIME'
+      e_size = 11,     // end of 'e' annotation
+    c_tag_val = 'c',
+      c_con_off = 7,    // utf8 payload, such as 'I' or 'Ljava/lang/String;'
+      c_size = 9,       // end of 'c' annotation
+    min_size = 6        // smallest possible size (zero members)
+  };
+  while ((--nann) >= 0 && (index-2 + min_size <= limit)) {
+    int index0 = index;
+    index = skip_annotation(buffer, limit, index);
+    u1* abase = buffer + index0;
+    int atype = Bytes::get_Java_u2(abase + atype_off);
+    int count = Bytes::get_Java_u2(abase + count_off);
+    Symbol* aname = check_symbol_at(cp, atype);
+    if (aname == NULL)  break;  // invalid annotation name
+    Symbol* member = NULL;
+    if (count >= 1) {
+      int member_index = Bytes::get_Java_u2(abase + member_off);
+      member = check_symbol_at(cp, member_index);
+      if (member == NULL)  break;  // invalid member name
+    }
+
+    // Here is where parsing particular annotations will take place.
+    AnnotationCollector::ID id = coll->annotation_index(aname);
+    if (id == AnnotationCollector::_unknown)  continue;
+    coll->set_annotation(id);
+    // If there are no values, just set the bit and move on:
+    if (count == 0)   continue;
+
+    // For the record, here is how annotation payloads can be collected.
+    // Suppose we want to capture @Retention.value.  Here is how:
+    //if (id == AnnotationCollector::_class_Retention) {
+    //  Symbol* payload = NULL;
+    //  if (count == 1
+    //      && e_size == (index0 - index)  // match size
+    //      && e_tag_val == *(abase + tag_off)
+    //      && (check_symbol_at(cp, Bytes::get_Java_u2(abase + e_type_off))
+    //          == vmSymbols::RetentionPolicy_signature())
+    //      && member == vmSymbols::value_name()) {
+    //    payload = check_symbol_at(cp, Bytes::get_Java_u2(abase + e_con_off));
+    //  }
+    //  check_property(payload != NULL,
+    //                 "Invalid @Retention annotation at offset %u in class file %s",
+    //                 index0, CHECK);
+    //  if (payload != NULL) {
+    //      payload->increment_refcount();
+    //      coll->_class_RetentionPolicy = payload;
+    //  }
+    //}
+  }
+}
+
+ClassFileParser::AnnotationCollector::ID ClassFileParser::AnnotationCollector::annotation_index(Symbol* name) {
+  vmSymbols::SID sid = vmSymbols::find_sid(name);
+  switch (sid) {
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_ForceInline_signature):
+    if (_location != _in_method)  break;  // only allow for methods
+    return _method_ForceInline;
+  default: break;
+  }
+  return AnnotationCollector::_unknown;
+}
+
+void ClassFileParser::FieldAnnotationCollector::apply_to(FieldInfo* f) {
+  fatal("no field annotations yet");
+}
+
+void ClassFileParser::MethodAnnotationCollector::apply_to(methodHandle m) {
+  if (has_annotation(_method_ForceInline))
+    m->set_force_inline(true);
+}
+
+void ClassFileParser::ClassAnnotationCollector::apply_to(instanceKlassHandle k) {
+  fatal("no class annotations yet");
+}
+
+
 #define MAX_ARGS_SIZE 255
 #define MAX_CODE_SIZE 65535
 #define INITIAL_MAX_LVT_NUMBER 256
 
 // Note: the parse_method below is big and clunky because all parsing of the code and exceptions
-// attribute is inlined. This is curbersome to avoid since we inline most of the parts in the
+// attribute is inlined. This is cumbersome to avoid since we inline most of the parts in the
 // methodOop to save footprint, so we only know the size of the resulting methodOop when the
 // entire method attribute is parsed.
 //
@@ -1730,6 +1888,7 @@
   // stackmap attribute - JDK1.5
   typeArrayHandle stackmap_data;
   u2 generic_signature_index = 0;
+  MethodAnnotationCollector parsed_annotations;
   u1* runtime_visible_annotations = NULL;
   int runtime_visible_annotations_length = 0;
   u1* runtime_invisible_annotations = NULL;
@@ -1956,6 +2115,7 @@
         runtime_visible_annotations_length = method_attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
+        parse_annotations(runtime_visible_annotations, runtime_visible_annotations_length, cp, &parsed_annotations, CHECK_(nullHandle));
         cfs->skip_u1(runtime_visible_annotations_length, CHECK_(nullHandle));
       } else if (PreserveAllAnnotations && method_attribute_name == vmSymbols::tag_runtime_invisible_annotations()) {
         runtime_invisible_annotations_length = method_attribute_length;
@@ -2144,6 +2304,8 @@
     clear_hashtable(lvt_Hash);
   }
 
+  if (parsed_annotations.has_any_annotations())
+    parsed_annotations.apply_to(m);
   *method_annotations = assemble_annotations(runtime_visible_annotations,
                                              runtime_visible_annotations_length,
                                              runtime_invisible_annotations,
@@ -2322,7 +2484,7 @@
 }
 
 
-void ClassFileParser::parse_classfile_sourcefile_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_sourcefile_attribute(constantPoolHandle cp, TRAPS) {
   ClassFileStream* cfs = stream();
   cfs->guarantee_more(2, CHECK);  // sourcefile_index
   u2 sourcefile_index = cfs->get_u2_fast();
@@ -2331,13 +2493,12 @@
       cp->tag_at(sourcefile_index).is_utf8(),
     "Invalid SourceFile attribute at constant pool index %u in class file %s",
     sourcefile_index, CHECK);
-  k->set_source_file_name(cp->symbol_at(sourcefile_index));
+  set_class_sourcefile(cp->symbol_at(sourcefile_index));
 }
 
 
 
 void ClassFileParser::parse_classfile_source_debug_extension_attribute(constantPoolHandle cp,
-                                                                       instanceKlassHandle k,
                                                                        int length, TRAPS) {
   ClassFileStream* cfs = stream();
   u1* sde_buffer = cfs->get_u1_buffer();
@@ -2345,7 +2506,13 @@
 
   // Don't bother storing it if there is no way to retrieve it
   if (JvmtiExport::can_get_source_debug_extension()) {
-    k->set_source_debug_extension((char*)sde_buffer, length);
+    assert((length+1) > length, "Overflow checking");
+    u1* sde = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, u1, length+1);
+    for (int i = 0; i < length; i++) {
+      sde[i] = sde_buffer[i];
+    }
+    sde[length] = '\0';
+    set_class_sde_buffer((char*)sde, length);
   }
   // Got utf8 string, set stream position forward
   cfs->skip_u1(length, CHECK);
@@ -2361,7 +2528,7 @@
                                                             u2 enclosing_method_class_index,
                                                             u2 enclosing_method_method_index,
                                                             constantPoolHandle cp,
-                                                            instanceKlassHandle k, TRAPS) {
+                                                            TRAPS) {
   ClassFileStream* cfs = stream();
   u1* current_mark = cfs->current();
   u2 length = 0;
@@ -2452,7 +2619,7 @@
   assert(index == size, "wrong size");
 
   // Update instanceKlass with inner class info.
-  k->set_inner_classes(inner_classes());
+  set_class_inner_classes(inner_classes);
 
   // Restore buffer's current position.
   cfs->set_current(current_mark);
@@ -2460,11 +2627,11 @@
   return length;
 }
 
-void ClassFileParser::parse_classfile_synthetic_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
-  k->set_is_synthetic();
+void ClassFileParser::parse_classfile_synthetic_attribute(constantPoolHandle cp, TRAPS) {
+  set_class_synthetic_flag(true);
 }
 
-void ClassFileParser::parse_classfile_signature_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_signature_attribute(constantPoolHandle cp, TRAPS) {
   ClassFileStream* cfs = stream();
   u2 signature_index = cfs->get_u2(CHECK);
   check_property(
@@ -2472,10 +2639,10 @@
       cp->tag_at(signature_index).is_utf8(),
     "Invalid constant pool index %u in Signature attribute in class file %s",
     signature_index, CHECK);
-  k->set_generic_signature(cp->symbol_at(signature_index));
+  set_class_generic_signature(cp->symbol_at(signature_index));
 }
 
-void ClassFileParser::parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, instanceKlassHandle k,
+void ClassFileParser::parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp,
                                                                   u4 attribute_byte_length, TRAPS) {
   ClassFileStream* cfs = stream();
   u1* current_start = cfs->current();
@@ -2547,10 +2714,12 @@
 }
 
 
-void ClassFileParser::parse_classfile_attributes(constantPoolHandle cp, instanceKlassHandle k, TRAPS) {
+void ClassFileParser::parse_classfile_attributes(constantPoolHandle cp,
+                                                 ClassFileParser::ClassAnnotationCollector* parsed_annotations,
+                                                 TRAPS) {
   ClassFileStream* cfs = stream();
   // Set inner classes attribute to default sentinel
-  k->set_inner_classes(Universe::the_empty_short_array());
+  set_class_inner_classes(typeArrayHandle(THREAD, Universe::the_empty_short_array()));
   cfs->guarantee_more(2, CHECK);  // attributes_count
   u2 attributes_count = cfs->get_u2_fast();
   bool parsed_sourcefile_attribute = false;
@@ -2586,10 +2755,10 @@
       } else {
         parsed_sourcefile_attribute = true;
       }
-      parse_classfile_sourcefile_attribute(cp, k, CHECK);
+      parse_classfile_sourcefile_attribute(cp, CHECK);
     } else if (tag == vmSymbols::tag_source_debug_extension()) {
       // Check for SourceDebugExtension tag
-      parse_classfile_source_debug_extension_attribute(cp, k, (int)attribute_length, CHECK);
+      parse_classfile_source_debug_extension_attribute(cp, (int)attribute_length, CHECK);
     } else if (tag == vmSymbols::tag_inner_classes()) {
       // Check for InnerClasses tag
       if (parsed_innerclasses_attribute) {
@@ -2608,7 +2777,7 @@
           "Invalid Synthetic classfile attribute length %u in class file %s",
           attribute_length, CHECK);
       }
-      parse_classfile_synthetic_attribute(cp, k, CHECK);
+      parse_classfile_synthetic_attribute(cp, CHECK);
     } else if (tag == vmSymbols::tag_deprecated()) {
       // Check for Deprecatd tag - 4276120
       if (attribute_length != 0) {
@@ -2623,11 +2792,16 @@
             "Wrong Signature attribute length %u in class file %s",
             attribute_length, CHECK);
         }
-        parse_classfile_signature_attribute(cp, k, CHECK);
+        parse_classfile_signature_attribute(cp, CHECK);
       } else if (tag == vmSymbols::tag_runtime_visible_annotations()) {
         runtime_visible_annotations_length = attribute_length;
         runtime_visible_annotations = cfs->get_u1_buffer();
         assert(runtime_visible_annotations != NULL, "null visible annotations");
+        parse_annotations(runtime_visible_annotations,
+                          runtime_visible_annotations_length,
+                          cp,
+                          parsed_annotations,
+                          CHECK);
         cfs->skip_u1(runtime_visible_annotations_length, CHECK);
       } else if (PreserveAllAnnotations && tag == vmSymbols::tag_runtime_invisible_annotations()) {
         runtime_invisible_annotations_length = attribute_length;
@@ -2661,7 +2835,7 @@
         if (parsed_bootstrap_methods_attribute)
           classfile_parse_error("Multiple BootstrapMethods attributes in class file %s", CHECK);
         parsed_bootstrap_methods_attribute = true;
-        parse_classfile_bootstrap_methods_attribute(cp, k, attribute_length, CHECK);
+        parse_classfile_bootstrap_methods_attribute(cp, attribute_length, CHECK);
       } else {
         // Unknown attribute
         cfs->skip_u1(attribute_length, CHECK);
@@ -2676,7 +2850,7 @@
                                                      runtime_invisible_annotations,
                                                      runtime_invisible_annotations_length,
                                                      CHECK);
-  k->set_class_annotations(annotations());
+  set_class_annotations(annotations);
 
   if (parsed_innerclasses_attribute || parsed_enclosingmethod_attribute) {
     u2 num_of_classes = parse_classfile_inner_classes_attribute(
@@ -2684,7 +2858,7 @@
                             parsed_innerclasses_attribute,
                             enclosing_method_class_index,
                             enclosing_method_method_index,
-                            cp, k, CHECK);
+                            cp, CHECK);
     if (parsed_innerclasses_attribute &&_need_verify && _major_version >= JAVA_1_5_VERSION) {
       guarantee_property(
         inner_classes_attribute_length == sizeof(num_of_classes) + 4 * sizeof(u2) * num_of_classes,
@@ -2698,6 +2872,23 @@
   }
 }
 
+void ClassFileParser::apply_parsed_class_attributes(instanceKlassHandle k) {
+  if (_synthetic_flag)
+    k->set_is_synthetic();
+  if (_sourcefile != NULL) {
+    _sourcefile->increment_refcount();
+    k->set_source_file_name(_sourcefile);
+  }
+  if (_generic_signature != NULL) {
+    _generic_signature->increment_refcount();
+    k->set_generic_signature(_generic_signature);
+  }
+  if (_sde_buffer != NULL) {
+    k->set_source_debug_extension(_sde_buffer, _sde_length);
+  }
+  k->set_inner_classes(_inner_classes());
+  k->set_class_annotations(_annotations());
+}
 
 typeArrayHandle ClassFileParser::assemble_annotations(u1* runtime_visible_annotations,
                                                       int runtime_visible_annotations_length,
@@ -2748,8 +2939,7 @@
                             jt->get_thread_stat()->perf_timers_addr(),
                             PerfClassTraceTime::PARSE_CLASS);
 
-  _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false;
-  _max_bootstrap_specifier_index = -1;
+  init_parsed_class_attributes();
 
   if (JvmtiExport::should_post_class_file_load_hook()) {
     // Get the cached class file bytes (if any) from the class that
@@ -2982,6 +3172,13 @@
     objArrayHandle methods_parameter_annotations(THREAD, methods_parameter_annotations_oop);
     objArrayHandle methods_default_annotations(THREAD, methods_default_annotations_oop);
 
+    // Additional attributes
+    ClassAnnotationCollector parsed_annotations;
+    parse_classfile_attributes(cp, &parsed_annotations, CHECK_(nullHandle));
+
+    // Make sure this is the end of class file stream
+    guarantee_property(cfs->at_eos(), "Extra bytes at the end of class file %s", CHECK_(nullHandle));
+
     // We check super class after class file is parsed and format is checked
     if (super_class_index > 0 && super_klass.is_null()) {
       Symbol*  sk  = cp->klass_name_at(super_class_index);
@@ -3470,11 +3667,10 @@
       this_klass->set_has_miranda_methods(); // then set a flag
     }
 
-    // Additional attributes
-    parse_classfile_attributes(cp, this_klass, CHECK_(nullHandle));
-
-    // Make sure this is the end of class file stream
-    guarantee_property(cfs->at_eos(), "Extra bytes at the end of class file %s", CHECK_(nullHandle));
+    // Fill in field values obtained by parse_classfile_attributes
+    if (parsed_annotations.has_any_annotations())
+      parsed_annotations.apply_to(this_klass);
+    apply_parsed_class_attributes(this_klass);
 
     // VerifyOops believes that once this has been set, the object is completely loaded.
     // Compute transitive closure of interfaces this class implements
@@ -3489,6 +3685,7 @@
     // Do final class setup
     fill_oop_maps(this_klass, nonstatic_oop_map_count, nonstatic_oop_offsets, nonstatic_oop_counts);
 
+    // Fill in has_finalizer, has_vanilla_constructor, and layout_helper
     set_precomputed_flags(this_klass);
 
     // reinitialize modifiers, using the InnerClasses attribute
--- a/src/share/vm/classfile/classFileParser.hpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/classfile/classFileParser.hpp	Thu Jul 19 16:17:41 2012 -0700
@@ -31,8 +31,8 @@
 #include "oops/typeArrayOop.hpp"
 #include "runtime/handles.inline.hpp"
 #include "utilities/accessFlags.hpp"
+#include "classfile/symbolTable.hpp"
 
-class TempNewSymbol;
 class FieldAllocationCount;
 
 
@@ -50,11 +50,80 @@
   KlassHandle _host_klass;
   GrowableArray<Handle>* _cp_patches; // overrides for CP entries
 
+  // precomputed flags
   bool _has_finalizer;
   bool _has_empty_finalizer;
   bool _has_vanilla_constructor;
+  int _max_bootstrap_specifier_index;  // detects BSS values
 
-  int _max_bootstrap_specifier_index;
+  // class attributes parsed before the instance klass is created:
+  bool       _synthetic_flag;
+  Symbol*    _sourcefile;
+  Symbol*    _generic_signature;
+  char*      _sde_buffer;
+  int        _sde_length;
+  typeArrayHandle _inner_classes;
+  typeArrayHandle _annotations;
+
+  void set_class_synthetic_flag(bool x)           { _synthetic_flag = x; }
+  void set_class_sourcefile(Symbol* x)            { _sourcefile = x; }
+  void set_class_generic_signature(Symbol* x)     { _generic_signature = x; }
+  void set_class_sde_buffer(char* x, int len)     { _sde_buffer = x; _sde_length = len; }
+  void set_class_inner_classes(typeArrayHandle x) { _inner_classes = x; }
+  void set_class_annotations(typeArrayHandle x)   { _annotations = x; }
+  void init_parsed_class_attributes() {
+    _synthetic_flag = false;
+    _sourcefile = NULL;
+    _generic_signature = NULL;
+    _sde_buffer = NULL;
+    _sde_length = 0;
+    // initialize the other flags too:
+    _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false;
+    _max_bootstrap_specifier_index = -1;
+  }
+  void apply_parsed_class_attributes(instanceKlassHandle k);  // update k
+
+  class AnnotationCollector {
+  public:
+    enum Location { _in_field, _in_method, _in_class };
+    enum ID {
+      _unknown = 0,
+      _method_ForceInline,
+      _annotation_LIMIT
+    };
+    const Location _location;
+    int _annotations_present;
+    AnnotationCollector(Location location)
+    : _location(location), _annotations_present(0)
+    {
+      assert((int)_annotation_LIMIT <= (int)sizeof(_annotations_present) * BitsPerByte, "");
+    }
+    // If this annotation name has an ID, report it (or _none).
+    ID annotation_index(Symbol* name);
+    // Set the annotation name:
+    void set_annotation(ID id) {
+      assert((int)id >= 0 && (int)id < (int)_annotation_LIMIT, "oob");
+      _annotations_present |= nth_bit((int)id);
+    }
+    // Report if the annotation is present.
+    bool has_any_annotations() { return _annotations_present != 0; }
+    bool has_annotation(ID id) { return (nth_bit((int)id) & _annotations_present) != 0; }
+  };
+  class FieldAnnotationCollector: public AnnotationCollector {
+  public:
+    FieldAnnotationCollector() : AnnotationCollector(_in_field) { }
+    void apply_to(FieldInfo* f);
+  };
+  class MethodAnnotationCollector: public AnnotationCollector {
+  public:
+    MethodAnnotationCollector() : AnnotationCollector(_in_method) { }
+    void apply_to(methodHandle m);
+  };
+  class ClassAnnotationCollector: public AnnotationCollector {
+  public:
+    ClassAnnotationCollector() : AnnotationCollector(_in_class) { }
+    void apply_to(instanceKlassHandle k);
+  };
 
   enum { fixed_buffer_size = 128 };
   u_char linenumbertable_buffer[fixed_buffer_size];
@@ -87,7 +156,9 @@
                               u2* constantvalue_index_addr,
                               bool* is_synthetic_addr,
                               u2* generic_signature_index_addr,
-                              typeArrayHandle* field_annotations, TRAPS);
+                              typeArrayHandle* field_annotations,
+                              FieldAnnotationCollector* parsed_annotations,
+                              TRAPS);
   typeArrayHandle parse_fields(Symbol* class_name,
                                constantPoolHandle cp, bool is_interface,
                                FieldAllocationCount *fac,
@@ -128,25 +199,32 @@
   typeArrayOop parse_stackmap_table(u4 code_attribute_length, TRAPS);
 
   // Classfile attribute parsing
-  void parse_classfile_sourcefile_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_source_debug_extension_attribute(constantPoolHandle cp,
-                                                instanceKlassHandle k, int length, TRAPS);
+  void parse_classfile_sourcefile_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_source_debug_extension_attribute(constantPoolHandle cp, int length, TRAPS);
   u2   parse_classfile_inner_classes_attribute(u1* inner_classes_attribute_start,
                                                bool parsed_enclosingmethod_attribute,
                                                u2 enclosing_method_class_index,
                                                u2 enclosing_method_method_index,
                                                constantPoolHandle cp,
-                                               instanceKlassHandle k, TRAPS);
-  void parse_classfile_attributes(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_synthetic_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_signature_attribute(constantPoolHandle cp, instanceKlassHandle k, TRAPS);
-  void parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, instanceKlassHandle k, u4 attribute_length, TRAPS);
+                                               TRAPS);
+  void parse_classfile_attributes(constantPoolHandle cp,
+                                  ClassAnnotationCollector* parsed_annotations,
+                                  TRAPS);
+  void parse_classfile_synthetic_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_signature_attribute(constantPoolHandle cp, TRAPS);
+  void parse_classfile_bootstrap_methods_attribute(constantPoolHandle cp, u4 attribute_length, TRAPS);
 
   // Annotations handling
   typeArrayHandle assemble_annotations(u1* runtime_visible_annotations,
                                        int runtime_visible_annotations_length,
                                        u1* runtime_invisible_annotations,
                                        int runtime_invisible_annotations_length, TRAPS);
+  int skip_annotation(u1* buffer, int limit, int index);
+  int skip_annotation_value(u1* buffer, int limit, int index);
+  void parse_annotations(u1* buffer, int limit, constantPoolHandle cp,
+                         /* Results (currently, only one result is supported): */
+                         AnnotationCollector* result,
+                         TRAPS);
 
   // Final setup
   unsigned int compute_oop_map_count(instanceKlassHandle super,
--- a/src/share/vm/classfile/javaClasses.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/classfile/javaClasses.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -2738,17 +2738,6 @@
   if (k != NULL) {
     compute_offset(_target_offset, k, vmSymbols::target_name(), vmSymbols::java_lang_invoke_MethodHandle_signature());
   }
-
-  // Disallow compilation of CallSite.setTargetNormal and CallSite.setTargetVolatile
-  // (For C2:  keep this until we have throttling logic for uncommon traps.)
-  if (k != NULL) {
-    instanceKlass* ik = instanceKlass::cast(k);
-    methodOop m_normal   = ik->lookup_method(vmSymbols::setTargetNormal_name(),   vmSymbols::setTarget_signature());
-    methodOop m_volatile = ik->lookup_method(vmSymbols::setTargetVolatile_name(), vmSymbols::setTarget_signature());
-    guarantee(m_normal != NULL && m_volatile != NULL, "must exist");
-    m_normal->set_not_compilable_quietly();
-    m_volatile->set_not_compilable_quietly();
-  }
 }
 
 
--- a/src/share/vm/classfile/vmSymbols.hpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Jul 19 16:17:41 2012 -0700
@@ -257,6 +257,7 @@
   template(java_lang_invoke_BoundMethodHandle,        "java/lang/invoke/BoundMethodHandle")       \
   template(java_lang_invoke_DirectMethodHandle,       "java/lang/invoke/DirectMethodHandle")      \
   template(java_lang_invoke_CountingMethodHandle,     "java/lang/invoke/CountingMethodHandle")    \
+  template(java_lang_invoke_ForceInline_signature,    "Ljava/lang/invoke/ForceInline;")           \
   /* internal up-calls made only by the JVM, via class sun.invoke.MethodHandleNatives: */         \
   template(findMethodHandleType_name,                 "findMethodHandleType")                     \
   template(findMethodHandleType_signature,       "(Ljava/lang/Class;[Ljava/lang/Class;)Ljava/lang/invoke/MethodType;") \
--- a/src/share/vm/code/vmreg.hpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/code/vmreg.hpp	Thu Jul 19 16:17:41 2012 -0700
@@ -131,6 +131,10 @@
     assert((is_reg() && value() < stack0->value() - 1) || is_stack(), "must be");
     return (VMReg)(intptr_t)(value() + 1);
   }
+  VMReg next(int i) {
+    assert((is_reg() && value() < stack0->value() - i) || is_stack(), "must be");
+    return (VMReg)(intptr_t)(value() + i);
+  }
   VMReg prev() {
     assert((is_stack() && value() > stack0->value()) || (is_reg() && value() != 0), "must be");
     return (VMReg)(intptr_t)(value() - 1);
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -546,23 +546,6 @@
     }
   }
 
-  if (is_put && !is_static && klass->is_subclass_of(SystemDictionary::CallSite_klass()) && (info.name() == vmSymbols::target_name())) {
-    const jint direction = frame::interpreter_frame_expression_stack_direction();
-    Handle call_site    (THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at(-1 * direction)));
-    Handle method_handle(THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at( 0 * direction)));
-    assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
-    assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
-
-    {
-      // Walk all nmethods depending on this call site.
-      MutexLocker mu(Compile_lock, thread);
-      Universe::flush_dependents_on(call_site, method_handle);
-    }
-
-    // Don't allow fast path for setting CallSite.target and sub-classes.
-    put_code = (Bytecodes::Code) 0;
-  }
-
   cache_entry(thread)->set_field(
     get_code,
     put_code,
--- a/src/share/vm/oops/methodOop.hpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/oops/methodOop.hpp	Thu Jul 19 16:17:41 2012 -0700
@@ -122,8 +122,9 @@
   u2                _max_locals;                 // Number of local variables used by this method
   u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
   u1                _intrinsic_id;               // vmSymbols::intrinsic_id (0 == _none)
-  u1                _jfr_towrite : 1,            // Flags
-                                 : 7;
+  u1                _jfr_towrite  : 1,           // Flags
+                    _force_inline : 1,
+                                  : 6;
   u2                _interpreter_throwout_count; // Count of times method was exited via exception while interpreting
   u2                _number_of_breakpoints;      // fullspeed debugging support
   InvocationCounter _invocation_counter;         // Incremented before each activation of the method - used to trigger frequency-based optimizations
@@ -655,6 +656,9 @@
   bool jfr_towrite()                 { return _jfr_towrite; }
   void set_jfr_towrite(bool towrite) { _jfr_towrite = towrite; }
 
+  bool force_inline()            { return _force_inline; }
+  void set_force_inline(bool fi) { _force_inline = fi; }
+
   // On-stack replacement support
   bool has_osr_nmethod(int level, bool match_level) {
    return instanceKlass::cast(method_holder())->lookup_osr_nmethod(this, InvocationEntryBci, level, match_level) != NULL;
--- a/src/share/vm/opto/stringopts.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/opto/stringopts.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -533,7 +533,17 @@
         if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) {
           CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava();
           if (csj->method() != NULL &&
-              csj->method()->intrinsic_id() == vmIntrinsics::_Integer_toString) {
+              csj->method()->intrinsic_id() == vmIntrinsics::_Integer_toString &&
+              arg->outcnt() == 1) {
+            // _control is the list of StringBuilder calls nodes which
+            // will be replaced by new String code after this optimization.
+            // Integer::toString() call is not part of StringBuilder calls
+            // chain. It could be eliminated only if its result is used
+            // only by this SB calls chain.
+            // Another limitation: it should be used only once because
+            // it is unknown that it is used only by this SB calls chain
+            // until all related SB calls nodes are collected.
+            assert(arg->unique_out() == cnode, "sanity");
             sc->add_control(csj);
             sc->push_int(csj->in(TypeFunc::Parms));
             continue;
--- a/src/share/vm/prims/methodHandles.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/prims/methodHandles.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -3180,17 +3180,15 @@
       jclass MH_class = env->FindClass(MH_name);
       status = env->RegisterNatives(MH_class, invoke_methods, sizeof(invoke_methods)/sizeof(JNINativeMethod));
     }
+    if (!env->ExceptionOccurred()) {
+      status = env->RegisterNatives(MHN_class, call_site_methods, sizeof(call_site_methods)/sizeof(JNINativeMethod));
+    }
     if (env->ExceptionOccurred()) {
       warning("JSR 292 method handle code is mismatched to this JVM.  Disabling support.");
       enable_MH = false;
       env->ExceptionClear();
     }
 
-    status = env->RegisterNatives(MHN_class, call_site_methods, sizeof(call_site_methods)/sizeof(JNINativeMethod));
-    if (env->ExceptionOccurred()) {
-      // Exception is okay until 7087357
-      env->ExceptionClear();
-    }
   }
 
   if (enable_MH) {
--- a/src/share/vm/prims/unsafe.cpp	Tue Jul 17 14:33:17 2012 -0700
+++ b/src/share/vm/prims/unsafe.cpp	Thu Jul 19 16:17:41 2012 -0700
@@ -178,17 +178,6 @@
     v = *(oop*)index_oop_from_field_offset_long(p, offset);                 \
   }
 
-#define GET_OOP_FIELD_VOLATILE(obj, offset, v) \
-  oop p = JNIHandles::resolve(obj);   \
-  volatile oop v;                     \
-  if (UseCompressedOops) {            \
-    volatile narrowOop n = *(volatile narrowOop*)index_oop_from_field_offset_long(p, offset); \
-    v = oopDesc::decode_heap_oop(n);                               \
-  } else {                            \
-    v = *(volatile oop*)index_oop_from_field_offset_long(p, offset);       \
-  } \
-  OrderAccess::acquire();
-
 
 // Get/SetObject must be special-cased, since it works with handles.
 
@@ -296,28 +285,21 @@
 
 UNSAFE_ENTRY(jobject, Unsafe_GetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset))
   UnsafeWrapper("Unsafe_GetObjectVolatile");
-  GET_OOP_FIELD_VOLATILE(obj, offset, v)
+  oop p = JNIHandles::resolve(obj);
+  void* addr = index_oop_from_field_offset_long(p, offset);
+  volatile oop v;
+  if (UseCompressedOops) {
+    volatile narrowOop n = *(volatile narrowOop*) addr;
+    v = oopDesc::decode_heap_oop(n);
+  } else {
+    v = *(volatile oop*) addr;
+  }
+  OrderAccess::acquire();
   return JNIHandles::make_local(env, v);
 UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_SetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
   UnsafeWrapper("Unsafe_SetObjectVolatile");
-  {
-    // Catch VolatileCallSite.target stores (via
-    // CallSite.setTargetVolatile) and check call site dependencies.
-    oop p = JNIHandles::resolve(obj);
-    if ((offset == java_lang_invoke_CallSite::target_offset_in_bytes()) && p->is_a(SystemDictionary::CallSite_klass())) {
-      Handle call_site    (THREAD, p);
-      Handle method_handle(THREAD, JNIHandles::resolve(x_h));
-      assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
-      assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
-      {
-        // Walk all nmethods depending on this call site.
-        MutexLocker mu(Compile_lock, thread);
-        Universe::flush_dependents_on(call_site(), method_handle());
-      }
-    }
-  }
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
   void* addr = index_oop_from_field_offset_long(p, offset);